aboutsummaryrefslogtreecommitdiff
path: root/lib/std/http/HeadParser.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2024-02-23 17:41:38 -0800
committerGitHub <noreply@github.com>2024-02-23 17:41:38 -0800
commitcfce81f7d5f11ab93b2d5fd26df41edf967f333b (patch)
tree11e52ad0a44620f4a4519683abd945146c11b312 /lib/std/http/HeadParser.zig
parent7230b68b350b16c637e84f3ff224be24d23214ce (diff)
parent653d4158cdcb20be82ff525e122277064e6acb92 (diff)
downloadzig-cfce81f7d5f11ab93b2d5fd26df41edf967f333b.tar.gz
zig-cfce81f7d5f11ab93b2d5fd26df41edf967f333b.zip
Merge pull request #18955 from ziglang/std.http.Server
take std.http in a different direction
Diffstat (limited to 'lib/std/http/HeadParser.zig')
-rw-r--r--lib/std/http/HeadParser.zig371
1 files changed, 371 insertions, 0 deletions
diff --git a/lib/std/http/HeadParser.zig b/lib/std/http/HeadParser.zig
new file mode 100644
index 0000000000..bb49faa14b
--- /dev/null
+++ b/lib/std/http/HeadParser.zig
@@ -0,0 +1,371 @@
+//! Finds the end of an HTTP head in a stream.
+
+state: State = .start,
+
+pub const State = enum {
+ start,
+ seen_n,
+ seen_r,
+ seen_rn,
+ seen_rnr,
+ finished,
+};
+
+/// Returns the number of bytes consumed by headers. This is always less
+/// than or equal to `bytes.len`.
+///
+/// If the amount returned is less than `bytes.len`, the parser is in a
+/// content state and the first byte of content is located at
+/// `bytes[result]`.
+pub fn feed(p: *HeadParser, bytes: []const u8) usize {
+ const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
+ var index: usize = 0;
+
+ while (true) {
+ switch (p.state) {
+ .finished => return index,
+ .start => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ return index + 1;
+ },
+ 2 => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ return index + 2;
+ },
+ 3 => {
+ const b24 = int24(bytes[index..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+
+ return index + 3;
+ },
+ 4...vector_len - 1 => {
+ const b32 = int32(bytes[index..][0..4]);
+ const b24 = intShift(u24, b32);
+ const b16 = intShift(u16, b32);
+ const b8 = intShift(u8, b32);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+
+ switch (b32) {
+ int32("\r\n\r\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 4;
+ continue;
+ },
+ else => {
+ const chunk = bytes[index..][0..vector_len];
+ const matches = if (use_vectors) matches: {
+ const Vector = @Vector(vector_len, u8);
+ // const BoolVector = @Vector(vector_len, bool);
+ const BitVector = @Vector(vector_len, u1);
+ const SizeVector = @Vector(vector_len, u8);
+
+ const v: Vector = chunk.*;
+ const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r')));
+ const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n')));
+ const matches_or: SizeVector = matches_r | matches_n;
+
+ break :matches @reduce(.Add, matches_or);
+ } else matches: {
+ var matches: u8 = 0;
+ for (chunk) |byte| switch (byte) {
+ '\r', '\n' => matches += 1,
+ else => {},
+ };
+ break :matches matches;
+ };
+ switch (matches) {
+ 0 => {},
+ 1 => switch (chunk[vector_len - 1]) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ },
+ 2 => {
+ const b16 = int16(chunk[vector_len - 2 ..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+ },
+ 3 => {
+ const b24 = int24(chunk[vector_len - 3 ..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+ },
+ 4...vector_len => {
+ inline for (0..vector_len - 3) |i_usize| {
+ const i = @as(u32, @truncate(i_usize));
+
+ const b32 = int32(chunk[i..][0..4]);
+ const b16 = intShift(u16, b32);
+
+ if (b32 == int32("\r\n\r\n")) {
+ p.state = .finished;
+ return index + i + 4;
+ } else if (b16 == int16("\n\n")) {
+ p.state = .finished;
+ return index + i + 2;
+ }
+ }
+
+ const b24 = int24(chunk[vector_len - 3 ..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+ },
+ else => unreachable,
+ }
+
+ index += vector_len;
+ continue;
+ },
+ },
+ .seen_n => switch (bytes.len - index) {
+ 0 => return index,
+ else => {
+ switch (bytes[index]) {
+ '\n' => p.state = .finished,
+ else => p.state = .start,
+ }
+
+ index += 1;
+ continue;
+ },
+ },
+ .seen_r => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\n' => p.state = .seen_rn,
+ '\r' => p.state = .seen_r,
+ else => p.state = .start,
+ }
+
+ return index + 1;
+ },
+ 2 => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_rn,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\r") => p.state = .seen_rnr,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ return index + 2;
+ },
+ else => {
+ const b24 = int24(bytes[index..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\n\r\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 3;
+ continue;
+ },
+ },
+ .seen_rn => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\r' => p.state = .seen_rnr,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ return index + 1;
+ },
+ else => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_rnr,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .finished,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 2;
+ continue;
+ },
+ },
+ .seen_rnr => switch (bytes.len - index) {
+ 0 => return index,
+ else => {
+ switch (bytes[index]) {
+ '\n' => p.state = .finished,
+ else => p.state = .start,
+ }
+
+ index += 1;
+ continue;
+ },
+ },
+ }
+
+ return index;
+ }
+}
+
+inline fn int16(array: *const [2]u8) u16 {
+ return @bitCast(array.*);
+}
+
+inline fn int24(array: *const [3]u8) u24 {
+ return @bitCast(array.*);
+}
+
+inline fn int32(array: *const [4]u8) u32 {
+ return @bitCast(array.*);
+}
+
+inline fn intShift(comptime T: type, x: anytype) T {
+ switch (@import("builtin").cpu.arch.endian()) {
+ .little => return @truncate(x >> (@bitSizeOf(@TypeOf(x)) - @bitSizeOf(T))),
+ .big => return @truncate(x),
+ }
+}
+
+const HeadParser = @This();
+const std = @import("std");
+const use_vectors = builtin.zig_backend != .stage2_x86_64;
+const builtin = @import("builtin");
+
+test feed {
+ const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello";
+
+ for (0..36) |i| {
+ var p: HeadParser = .{};
+ try std.testing.expectEqual(i, p.feed(data[0..i]));
+ try std.testing.expectEqual(35 - i, p.feed(data[i..]));
+ }
+}