diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-02-23 17:41:38 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-23 17:41:38 -0800 |
| commit | cfce81f7d5f11ab93b2d5fd26df41edf967f333b (patch) | |
| tree | 11e52ad0a44620f4a4519683abd945146c11b312 /lib/std/http/protocol.zig | |
| parent | 7230b68b350b16c637e84f3ff224be24d23214ce (diff) | |
| parent | 653d4158cdcb20be82ff525e122277064e6acb92 (diff) | |
| download | zig-cfce81f7d5f11ab93b2d5fd26df41edf967f333b.tar.gz zig-cfce81f7d5f11ab93b2d5fd26df41edf967f333b.zip | |
Merge pull request #18955 from ziglang/std.http.Server
take std.http in a different direction
Diffstat (limited to 'lib/std/http/protocol.zig')
| -rw-r--r-- | lib/std/http/protocol.zig | 632 |
1 files changed, 111 insertions, 521 deletions
diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig index 0ccafd2ee5..78511f435d 100644 --- a/lib/std/http/protocol.zig +++ b/lib/std/http/protocol.zig @@ -7,15 +7,19 @@ const assert = std.debug.assert; const use_vectors = builtin.zig_backend != .stage2_x86_64; pub const State = enum { - /// Begin header parsing states. invalid, + + // Begin header and trailer parsing states. + start, seen_n, seen_r, seen_rn, seen_rnr, finished, - /// Begin transfer-encoding: chunked parsing states. + + // Begin transfer-encoding: chunked parsing states. + chunk_head_size, chunk_head_ext, chunk_head_r, @@ -34,484 +38,114 @@ pub const State = enum { pub const HeadersParser = struct { state: State = .start, - /// Whether or not `header_bytes` is allocated or was provided as a fixed buffer. - header_bytes_owned: bool, - /// Either a fixed buffer of len `max_header_bytes` or a dynamic buffer that can grow up to `max_header_bytes`. + /// A fixed buffer of len `max_header_bytes`. /// Pointers into this buffer are not stable until after a message is complete. - header_bytes: std.ArrayListUnmanaged(u8), - /// The maximum allowed size of `header_bytes`. - max_header_bytes: usize, - next_chunk_length: u64 = 0, - /// Whether this parser is done parsing a complete message. - /// A message is only done when the entire payload has been read. - done: bool = false, - - /// Initializes the parser with a dynamically growing header buffer of up to `max` bytes. - pub fn initDynamic(max: usize) HeadersParser { - return .{ - .header_bytes = .{}, - .max_header_bytes = max, - .header_bytes_owned = true, - }; - } + header_bytes_buffer: []u8, + header_bytes_len: u32, + next_chunk_length: u64, + /// `false`: headers. `true`: trailers. + done: bool, /// Initializes the parser with a provided buffer `buf`. - pub fn initStatic(buf: []u8) HeadersParser { + pub fn init(buf: []u8) HeadersParser { return .{ - .header_bytes = .{ .items = buf[0..0], .capacity = buf.len }, - .max_header_bytes = buf.len, - .header_bytes_owned = false, + .header_bytes_buffer = buf, + .header_bytes_len = 0, + .done = false, + .next_chunk_length = 0, }; } - /// Completely resets the parser to it's initial state. - /// This must be called after a message is complete. - pub fn reset(r: *HeadersParser) void { - assert(r.done); // The message must be completely read before reset, otherwise the parser is in an invalid state. - - r.header_bytes.clearRetainingCapacity(); - - r.* = .{ - .header_bytes = r.header_bytes, - .max_header_bytes = r.max_header_bytes, - .header_bytes_owned = r.header_bytes_owned, + /// Reinitialize the parser. + /// Asserts the parser is in the "done" state. + pub fn reset(hp: *HeadersParser) void { + assert(hp.done); + hp.* = .{ + .state = .start, + .header_bytes_buffer = hp.header_bytes_buffer, + .header_bytes_len = 0, + .done = false, + .next_chunk_length = 0, }; } - /// Returns the number of bytes consumed by headers. This is always less than or equal to `bytes.len`. - /// You should check `r.state.isContent()` after this to check if the headers are done. - /// - /// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the - /// first byte of content is located at `bytes[result]`. - pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 { - const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8); - const len: u32 = @intCast(bytes.len); - var index: u32 = 0; - - while (true) { - switch (r.state) { - .invalid => unreachable, - .finished => return index, - .start => switch (len - index) { - 0 => return index, - 1 => { - switch (bytes[index]) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - return index + 1; - }, - 2 => { - const b16 = int16(bytes[index..][0..2]); - const b8 = intShift(u8, b16); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - return index + 2; - }, - 3 => { - const b24 = int24(bytes[index..][0..3]); - const b16 = intShift(u16, b24); - const b8 = intShift(u8, b24); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - switch (b24) { - int24("\r\n\r") => r.state = .seen_rnr, - else => {}, - } - - return index + 3; - }, - 4...vector_len - 1 => { - const b32 = int32(bytes[index..][0..4]); - const b24 = intShift(u24, b32); - const b16 = intShift(u16, b32); - const b8 = intShift(u8, b32); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - switch (b24) { - int24("\r\n\r") => r.state = .seen_rnr, - else => {}, - } - - switch (b32) { - int32("\r\n\r\n") => r.state = .finished, - else => {}, - } - - index += 4; - continue; - }, - else => { - const chunk = bytes[index..][0..vector_len]; - const matches = if (use_vectors) matches: { - const Vector = @Vector(vector_len, u8); - // const BoolVector = @Vector(vector_len, bool); - const BitVector = @Vector(vector_len, u1); - const SizeVector = @Vector(vector_len, u8); - - const v: Vector = chunk.*; - const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r'))); - const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n'))); - const matches_or: SizeVector = matches_r | matches_n; - - break :matches @reduce(.Add, matches_or); - } else matches: { - var matches: u8 = 0; - for (chunk) |byte| switch (byte) { - '\r', '\n' => matches += 1, - else => {}, - }; - break :matches matches; - }; - switch (matches) { - 0 => {}, - 1 => switch (chunk[vector_len - 1]) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - }, - 2 => { - const b16 = int16(chunk[vector_len - 2 ..][0..2]); - const b8 = intShift(u8, b16); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - }, - 3 => { - const b24 = int24(chunk[vector_len - 3 ..][0..3]); - const b16 = intShift(u16, b24); - const b8 = intShift(u8, b24); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - switch (b24) { - int24("\r\n\r") => r.state = .seen_rnr, - else => {}, - } - }, - 4...vector_len => { - inline for (0..vector_len - 3) |i_usize| { - const i = @as(u32, @truncate(i_usize)); - - const b32 = int32(chunk[i..][0..4]); - const b16 = intShift(u16, b32); - - if (b32 == int32("\r\n\r\n")) { - r.state = .finished; - return index + i + 4; - } else if (b16 == int16("\n\n")) { - r.state = .finished; - return index + i + 2; - } - } - - const b24 = int24(chunk[vector_len - 3 ..][0..3]); - const b16 = intShift(u16, b24); - const b8 = intShift(u8, b24); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => {}, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - switch (b24) { - int24("\r\n\r") => r.state = .seen_rnr, - else => {}, - } - }, - else => unreachable, - } - - index += vector_len; - continue; - }, - }, - .seen_n => switch (len - index) { - 0 => return index, - else => { - switch (bytes[index]) { - '\n' => r.state = .finished, - else => r.state = .start, - } - - index += 1; - continue; - }, - }, - .seen_r => switch (len - index) { - 0 => return index, - 1 => { - switch (bytes[index]) { - '\n' => r.state = .seen_rn, - '\r' => r.state = .seen_r, - else => r.state = .start, - } - - return index + 1; - }, - 2 => { - const b16 = int16(bytes[index..][0..2]); - const b8 = intShift(u8, b16); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_rn, - else => r.state = .start, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\r") => r.state = .seen_rnr, - int16("\n\n") => r.state = .finished, - else => {}, - } - - return index + 2; - }, - else => { - const b24 = int24(bytes[index..][0..3]); - const b16 = intShift(u16, b24); - const b8 = intShift(u8, b24); - - switch (b8) { - '\r' => r.state = .seen_r, - '\n' => r.state = .seen_n, - else => r.state = .start, - } - - switch (b16) { - int16("\r\n") => r.state = .seen_rn, - int16("\n\n") => r.state = .finished, - else => {}, - } - - switch (b24) { - int24("\n\r\n") => r.state = .finished, - else => {}, - } - - index += 3; - continue; - }, - }, - .seen_rn => switch (len - index) { - 0 => return index, - 1 => { - switch (bytes[index]) { - '\r' => r.state = .seen_rnr, - '\n' => r.state = .seen_n, - else => r.state = .start, - } - - return index + 1; - }, - else => { - const b16 = int16(bytes[index..][0..2]); - const b8 = intShift(u8, b16); - - switch (b8) { - '\r' => r.state = .seen_rnr, - '\n' => r.state = .seen_n, - else => r.state = .start, - } - - switch (b16) { - int16("\r\n") => r.state = .finished, - int16("\n\n") => r.state = .finished, - else => {}, - } - - index += 2; - continue; - }, - }, - .seen_rnr => switch (len - index) { - 0 => return index, - else => { - switch (bytes[index]) { - '\n' => r.state = .finished, - else => r.state = .start, - } - - index += 1; - continue; - }, - }, - .chunk_head_size => unreachable, - .chunk_head_ext => unreachable, - .chunk_head_r => unreachable, - .chunk_data => unreachable, - .chunk_data_suffix => unreachable, - .chunk_data_suffix_r => unreachable, - } + pub fn get(hp: HeadersParser) []u8 { + return hp.header_bytes_buffer[0..hp.header_bytes_len]; + } - return index; - } + pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 { + var hp: std.http.HeadParser = .{ + .state = switch (r.state) { + .start => .start, + .seen_n => .seen_n, + .seen_r => .seen_r, + .seen_rn => .seen_rn, + .seen_rnr => .seen_rnr, + .finished => .finished, + else => unreachable, + }, + }; + const result = hp.feed(bytes); + r.state = switch (hp.state) { + .start => .start, + .seen_n => .seen_n, + .seen_r => .seen_r, + .seen_rn => .seen_rn, + .seen_rnr => .seen_rnr, + .finished => .finished, + }; + return @intCast(result); } - /// Returns the number of bytes consumed by the chunk size. This is always less than or equal to `bytes.len`. - /// You should check `r.state == .chunk_data` after this to check if the chunk size has been fully parsed. - /// - /// If the amount returned is less than `bytes.len`, you may assume that the parser is in the `chunk_data` state - /// and that the first byte of the chunk is at `bytes[result]`. pub fn findChunkedLen(r: *HeadersParser, bytes: []const u8) u32 { - const len = @as(u32, @intCast(bytes.len)); - - for (bytes[0..], 0..) |c, i| { - const index = @as(u32, @intCast(i)); - switch (r.state) { - .chunk_data_suffix => switch (c) { - '\r' => r.state = .chunk_data_suffix_r, - '\n' => r.state = .chunk_head_size, - else => { - r.state = .invalid; - return index; - }, - }, - .chunk_data_suffix_r => switch (c) { - '\n' => r.state = .chunk_head_size, - else => { - r.state = .invalid; - return index; - }, - }, - .chunk_head_size => { - const digit = switch (c) { - '0'...'9' => |b| b - '0', - 'A'...'Z' => |b| b - 'A' + 10, - 'a'...'z' => |b| b - 'a' + 10, - '\r' => { - r.state = .chunk_head_r; - continue; - }, - '\n' => { - r.state = .chunk_data; - return index + 1; - }, - else => { - r.state = .chunk_head_ext; - continue; - }, - }; - - const new_len = r.next_chunk_length *% 16 +% digit; - if (new_len <= r.next_chunk_length and r.next_chunk_length != 0) { - r.state = .invalid; - return index; - } - - r.next_chunk_length = new_len; - }, - .chunk_head_ext => switch (c) { - '\r' => r.state = .chunk_head_r, - '\n' => { - r.state = .chunk_data; - return index + 1; - }, - else => continue, - }, - .chunk_head_r => switch (c) { - '\n' => { - r.state = .chunk_data; - return index + 1; - }, - else => { - r.state = .invalid; - return index; - }, - }, + var cp: std.http.ChunkParser = .{ + .state = switch (r.state) { + .chunk_head_size => .head_size, + .chunk_head_ext => .head_ext, + .chunk_head_r => .head_r, + .chunk_data => .data, + .chunk_data_suffix => .data_suffix, + .chunk_data_suffix_r => .data_suffix_r, + .invalid => .invalid, else => unreachable, - } - } - - return len; + }, + .chunk_len = r.next_chunk_length, + }; + const result = cp.feed(bytes); + r.state = switch (cp.state) { + .head_size => .chunk_head_size, + .head_ext => .chunk_head_ext, + .head_r => .chunk_head_r, + .data => .chunk_data, + .data_suffix => .chunk_data_suffix, + .data_suffix_r => .chunk_data_suffix_r, + .invalid => .invalid, + }; + r.next_chunk_length = cp.chunk_len; + return @intCast(result); } - /// Returns whether or not the parser has finished parsing a complete message. A message is only complete after the - /// entire body has been read and any trailing headers have been parsed. + /// Returns whether or not the parser has finished parsing a complete + /// message. A message is only complete after the entire body has been read + /// and any trailing headers have been parsed. pub fn isComplete(r: *HeadersParser) bool { return r.done and r.state == .finished; } - pub const CheckCompleteHeadError = mem.Allocator.Error || error{HttpHeadersExceededSizeLimit}; + pub const CheckCompleteHeadError = error{HttpHeadersOversize}; - /// Pushes `in` into the parser. Returns the number of bytes consumed by the header. Any header bytes are appended - /// to the `header_bytes` buffer. - /// - /// This function only uses `allocator` if `r.header_bytes_owned` is true, and may be undefined otherwise. - pub fn checkCompleteHead(r: *HeadersParser, allocator: std.mem.Allocator, in: []const u8) CheckCompleteHeadError!u32 { - if (r.state.isContent()) return 0; + /// Pushes `in` into the parser. Returns the number of bytes consumed by + /// the header. Any header bytes are appended to `header_bytes_buffer`. + pub fn checkCompleteHead(hp: *HeadersParser, in: []const u8) CheckCompleteHeadError!u32 { + if (hp.state.isContent()) return 0; - const i = r.findHeadersEnd(in); + const i = hp.findHeadersEnd(in); const data = in[0..i]; - if (r.header_bytes.items.len + data.len > r.max_header_bytes) { - return error.HttpHeadersExceededSizeLimit; - } else { - if (r.header_bytes_owned) try r.header_bytes.ensureUnusedCapacity(allocator, data.len); + if (hp.header_bytes_len + data.len > hp.header_bytes_buffer.len) + return error.HttpHeadersOversize; - r.header_bytes.appendSliceAssumeCapacity(data); - } + @memcpy(hp.header_bytes_buffer[hp.header_bytes_len..][0..data.len], data); + hp.header_bytes_len += @intCast(data.len); return i; } @@ -520,7 +154,8 @@ pub const HeadersParser = struct { HttpChunkInvalid, }; - /// Reads the body of the message into `buffer`. Returns the number of bytes placed in the buffer. + /// Reads the body of the message into `buffer`. Returns the number of + /// bytes placed in the buffer. /// /// If `skip` is true, the buffer will be unused and the body will be skipped. /// @@ -571,9 +206,10 @@ pub const HeadersParser = struct { .chunk_data => if (r.next_chunk_length == 0) { if (std.mem.eql(u8, conn.peek(), "\r\n")) { r.state = .finished; - r.done = true; + conn.drop(2); } else { - // The trailer section is formatted identically to the header section. + // The trailer section is formatted identically + // to the header section. r.state = .seen_rn; } r.done = true; @@ -713,57 +349,11 @@ const MockBufferedConnection = struct { } }; -test "HeadersParser.findHeadersEnd" { - var r: HeadersParser = undefined; - const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello"; - - for (0..36) |i| { - r = HeadersParser.initDynamic(0); - try std.testing.expectEqual(@as(u32, @intCast(i)), r.findHeadersEnd(data[0..i])); - try std.testing.expectEqual(@as(u32, @intCast(35 - i)), r.findHeadersEnd(data[i..])); - } -} - -test "HeadersParser.findChunkedLen" { - var r: HeadersParser = undefined; - const data = "Ff\r\nf0f000 ; ext\n0\r\nffffffffffffffffffffffffffffffffffffffff\r\n"; - - r = HeadersParser.initDynamic(0); - r.state = .chunk_head_size; - r.next_chunk_length = 0; - - const first = r.findChunkedLen(data[0..]); - try testing.expectEqual(@as(u32, 4), first); - try testing.expectEqual(@as(u64, 0xff), r.next_chunk_length); - try testing.expectEqual(State.chunk_data, r.state); - r.state = .chunk_head_size; - r.next_chunk_length = 0; - - const second = r.findChunkedLen(data[first..]); - try testing.expectEqual(@as(u32, 13), second); - try testing.expectEqual(@as(u64, 0xf0f000), r.next_chunk_length); - try testing.expectEqual(State.chunk_data, r.state); - r.state = .chunk_head_size; - r.next_chunk_length = 0; - - const third = r.findChunkedLen(data[first + second ..]); - try testing.expectEqual(@as(u32, 3), third); - try testing.expectEqual(@as(u64, 0), r.next_chunk_length); - try testing.expectEqual(State.chunk_data, r.state); - r.state = .chunk_head_size; - r.next_chunk_length = 0; - - const fourth = r.findChunkedLen(data[first + second + third ..]); - try testing.expectEqual(@as(u32, 16), fourth); - try testing.expectEqual(@as(u64, 0xffffffffffffffff), r.next_chunk_length); - try testing.expectEqual(State.invalid, r.state); -} - test "HeadersParser.read length" { // mock BufferedConnection for read + var headers_buf: [256]u8 = undefined; - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nHello"; var conn: MockBufferedConnection = .{ @@ -773,8 +363,8 @@ test "HeadersParser.read length" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -786,14 +376,14 @@ test "HeadersParser.read length" { try std.testing.expectEqual(@as(usize, 5), len); try std.testing.expectEqualStrings("Hello", buf[0..len]); - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.get()); } test "HeadersParser.read chunked" { // mock BufferedConnection for read - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var headers_buf: [256]u8 = undefined; + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\n\r\n"; var conn: MockBufferedConnection = .{ @@ -803,8 +393,8 @@ test "HeadersParser.read chunked" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -815,14 +405,14 @@ test "HeadersParser.read chunked" { try std.testing.expectEqual(@as(usize, 5), len); try std.testing.expectEqualStrings("Hello", buf[0..len]); - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.get()); } test "HeadersParser.read chunked trailer" { // mock BufferedConnection for read - var r = HeadersParser.initDynamic(256); - defer r.header_bytes.deinit(std.testing.allocator); + var headers_buf: [256]u8 = undefined; + var r = HeadersParser.init(&headers_buf); const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\nContent-Type: text/plain\r\n\r\n"; var conn: MockBufferedConnection = .{ @@ -832,8 +422,8 @@ test "HeadersParser.read chunked trailer" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } @@ -847,11 +437,11 @@ test "HeadersParser.read chunked trailer" { while (true) { // read headers try conn.fill(); - const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek()); - conn.drop(@as(u16, @intCast(nchecked))); + const nchecked = try r.checkCompleteHead(conn.peek()); + conn.drop(@intCast(nchecked)); if (r.state.isContent()) break; } - try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.header_bytes.items); + try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.get()); } |
