Merge branch 'master' into autodoc-searchkey

author: Loris Cro <kappaloris@gmail.com> 2023-06-18 09:06:40 +0200
committer: GitHub <noreply@github.com> 2023-06-18 09:06:40 +0200
commit: 216ef10dc471e4db60a30208be178d6c59efeaaf (patch)
tree: 8c239dab283ae9cb3b7fe099bae240bcc53f894e /lib/std/json/scanner.zig
parent: 0fc1d396495c1ab482197021dedac8bea3f9401c (diff)
parent: 729a051e9e38674233190aea23c0ac8c134f2d67 (diff)
download: zig-216ef10dc471e4db60a30208be178d6c59efeaaf.tar.gz
zig-216ef10dc471e4db60a30208be178d6c59efeaaf.zip
1 files changed, 1764 insertions, 0 deletions
diff --git a/lib/std/json/scanner.zig b/lib/std/json/scanner.zig
new file mode 100644
index 0000000000..4fb7c1da01
--- /dev/null
+++ b/lib/std/json/scanner.zig
@@ -0,0 +1,1764 @@
+// Notes on standards compliance: https://datatracker.ietf.org/doc/html/rfc8259
+// * RFC 8259 requires JSON documents be valid UTF-8,
+//   but makes an allowance for systems that are "part of a closed ecosystem".
+//   I have no idea what that's supposed to mean in the context of a standard specification.
+//   This implementation requires inputs to be valid UTF-8.
+// * RFC 8259 contradicts itself regarding whether lowercase is allowed in \u hex digits,
+//   but this is probably a bug in the spec, and it's clear that lowercase is meant to be allowed.
+//   (RFC 5234 defines HEXDIG to only allow uppercase.)
+// * When RFC 8259 refers to a "character", I assume they really mean a "Unicode scalar value".
+//   See http://www.unicode.org/glossary/#unicode_scalar_value .
+// * RFC 8259 doesn't explicitly disallow unpaired surrogate halves in \u escape sequences,
+//   but vaguely implies that \u escapes are for encoding Unicode "characters" (i.e. Unicode scalar values?),
+//   which would mean that unpaired surrogate halves are forbidden.
+//   By contrast ECMA-404 (a competing(/compatible?) JSON standard, which JavaScript's JSON.parse() conforms to)
+//   explicitly allows unpaired surrogate halves.
+//   This implementation forbids unpaired surrogate halves in \u sequences.
+//   If a high surrogate half appears in a \u sequence,
+//   then a low surrogate half must immediately follow in \u notation.
+// * RFC 8259 allows implementations to "accept non-JSON forms or extensions".
+//   This implementation does not accept any of that.
+// * RFC 8259 allows implementations to put limits on "the size of texts",
+//   "the maximum depth of nesting", "the range and precision of numbers",
+//   and "the length and character contents of strings".
+//   This low-level implementation does not limit these,
+//   except where noted above, and except that nesting depth requires memory allocation.
+//   Note that this low-level API does not interpret numbers numerically,
+//   but simply emits their source form for some higher level code to make sense of.
+// * This low-level implementation allows duplicate object keys,
+//   and key/value pairs are emitted in the order they appear in the input.
+
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const assert = std.debug.assert;
+
+/// Scan the input and check for malformed JSON.
+/// On `SyntaxError` or `UnexpectedEndOfInput`, returns `false`.
+/// Returns any errors from the allocator as-is, which is unlikely,
+/// but can be caused by extreme nesting depth in the input.
+pub fn validate(allocator: Allocator, s: []const u8) Allocator.Error!bool {
+    var scanner = Scanner.initCompleteInput(allocator, s);
+    defer scanner.deinit();
+
+    while (true) {
+        const token = scanner.next() catch |err| switch (err) {
+            error.SyntaxError, error.UnexpectedEndOfInput => return false,
+            error.OutOfMemory => return error.OutOfMemory,
+            error.BufferUnderrun => unreachable,
+        };
+        if (token == .end_of_document) break;
+    }
+
+    return true;
+}
+
+/// The parsing errors are divided into two categories:
+///  * `SyntaxError` is for clearly malformed JSON documents,
+///    such as giving an input document that isn't JSON at all.
+///  * `UnexpectedEndOfInput` is for signaling that everything's been
+///    valid so far, but the input appears to be truncated for some reason.
+/// Note that a completely empty (or whitespace-only) input will give `UnexpectedEndOfInput`.
+pub const Error = error{ SyntaxError, UnexpectedEndOfInput };
+
+/// Calls `std.json.Reader` with `std.json.default_buffer_size`.
+pub fn reader(allocator: Allocator, io_reader: anytype) Reader(default_buffer_size, @TypeOf(io_reader)) {
+    return Reader(default_buffer_size, @TypeOf(io_reader)).init(allocator, io_reader);
+}
+/// Used by `json.reader`.
+pub const default_buffer_size = 0x1000;
+
+/// The tokens emitted by `std.json.Scanner` and `std.json.Reader` `.next*()` functions follow this grammar:
+/// ```
+///  <document> = <value> .end_of_document
+///  <value> =
+///    | <object>
+///    | <array>
+///    | <number>
+///    | <string>
+///    | .true
+///    | .false
+///    | .null
+///  <object> = .object_begin ( <string> <value> )* .object_end
+///  <array> = .array_begin ( <value> )* .array_end
+///  <number> = <It depends. See below.>
+///  <string> = <It depends. See below.>
+/// ```
+///
+/// What you get for `<number>` and `<string>` values depends on which `next*()` method you call:
+///
+/// ```
+/// next():
+///  <number> = ( .partial_number )* .number
+///  <string> = ( <partial_string> )* .string
+///  <partial_string> =
+///    | .partial_string
+///    | .partial_string_escaped_1
+///    | .partial_string_escaped_2
+///    | .partial_string_escaped_3
+///    | .partial_string_escaped_4
+///
+/// nextAlloc*(..., .alloc_always):
+///  <number> = .allocated_number
+///  <string> = .allocated_string
+///
+/// nextAlloc*(..., .alloc_if_needed):
+///  <number> =
+///    | .number
+///    | .allocated_number
+///  <string> =
+///    | .string
+///    | .allocated_string
+/// ```
+///
+/// For all tokens with a `[]const u8`, `[]u8`, or `[n]u8` payload, the payload represents the content of the value.
+/// For number values, this is the representation of the number exactly as it appears in the input.
+/// For strings, this is the content of the string after resolving escape sequences.
+///
+/// For `.allocated_number` and `.allocated_string`, the `[]u8` payloads are allocations made with the given allocator.
+/// You are responsible for managing that memory. `json.Reader.deinit()` does *not* free those allocations.
+///
+/// The `.partial_*` tokens indicate that a value spans multiple input buffers or that a string contains escape sequences.
+/// To get a complete value in memory, you need to concatenate the values yourself.
+/// Calling `nextAlloc*()` does this for you, and returns an `.allocated_*` token with the result.
+///
+/// For tokens with a `[]const u8` payload, the payload is a slice into the current input buffer.
+/// The memory may become undefined during the next call to `json.Scanner.feedInput()`
+/// or any `json.Reader` method whose return error set includes `json.Error`.
+/// To keep the value persistently, it recommended to make a copy or to use `.alloc_always`,
+/// which makes a copy for you.
+///
+/// Note that `.number` and `.string` tokens that follow `.partial_*` tokens may have `0` length to indicate that
+/// the previously partial value is completed with no additional bytes.
+/// (This can happen when the break between input buffers happens to land on the exact end of a value. E.g. `"[1234"`, `"]"`.)
+/// `.partial_*` tokens never have `0` length.
+///
+/// The recommended strategy for using the different `next*()` methods is something like this:
+///
+/// When you're expecting an object key, use `.alloc_if_needed`.
+/// You often don't need a copy of the key string to persist; you might just check which field it is.
+/// In the case that the key happens to require an allocation, free it immediately after checking it.
+///
+/// When you're expecting a meaningful string value (such as on the right of a `:`),
+/// use `.alloc_always` in order to keep the value valid throughout parsing the rest of the document.
+///
+/// When you're expecting a number value, use `.alloc_if_needed`.
+/// You're probably going to be parsing the string representation of the number into a numeric representation,
+/// so you need the complete string representation only temporarily.
+///
+/// When you're skipping an unrecognized value, use `skipValue()`.
+pub const Token = union(enum) {
+    object_begin,
+    object_end,
+    array_begin,
+    array_end,
+
+    true,
+    false,
+    null,
+
+    number: []const u8,
+    partial_number: []const u8,
+    allocated_number: []u8,
+
+    string: []const u8,
+    partial_string: []const u8,
+    partial_string_escaped_1: [1]u8,
+    partial_string_escaped_2: [2]u8,
+    partial_string_escaped_3: [3]u8,
+    partial_string_escaped_4: [4]u8,
+    allocated_string: []u8,
+
+    end_of_document,
+};
+
+/// This is only used in `peekNextTokenType()` and gives a categorization based on the first byte of the next token that will be emitted from a `next*()` call.
+pub const TokenType = enum {
+    object_begin,
+    object_end,
+    array_begin,
+    array_end,
+    true,
+    false,
+    null,
+    number,
+    string,
+    end_of_document,
+};
+
+/// To enable diagnostics, declare `var diagnostics = Diagnostics{};` then call `source.enableDiagnostics(&diagnostics);`
+/// where `source` is either a `std.json.Reader` or a `std.json.Scanner` that has just been initialized.
+/// At any time, notably just after an error, call `getLine()`, `getColumn()`, and/or `getByteOffset()`
+/// to get meaningful information from this.
+pub const Diagnostics = struct {
+    line_number: u64 = 1,
+    line_start_cursor: usize = @bitCast(usize, @as(isize, -1)), // Start just "before" the input buffer to get a 1-based column for line 1.
+    total_bytes_before_current_input: u64 = 0,
+    cursor_pointer: *const usize = undefined,
+
+    /// Starts at 1.
+    pub fn getLine(self: *const @This()) u64 {
+        return self.line_number;
+    }
+    /// Starts at 1.
+    pub fn getColumn(self: *const @This()) u64 {
+        return self.cursor_pointer.* -% self.line_start_cursor;
+    }
+    /// Starts at 0. Measures the byte offset since the start of the input.
+    pub fn getByteOffset(self: *const @This()) u64 {
+        return self.total_bytes_before_current_input + self.cursor_pointer.*;
+    }
+};
+
+/// See the documentation for `std.json.Token`.
+pub const AllocWhen = enum { alloc_if_needed, alloc_always };
+
+/// For security, the maximum size allocated to store a single string or number value is limited to 4MiB by default.
+/// This limit can be specified by calling `nextAllocMax()` instead of `nextAlloc()`.
+pub const default_max_value_len = 4 * 1024 * 1024;
+
+/// Connects a `std.io.Reader` to a `std.json.Scanner`.
+/// All `next*()` methods here handle `error.BufferUnderrun` from `std.json.Scanner`, and then read from the reader.
+pub fn Reader(comptime buffer_size: usize, comptime ReaderType: type) type {
+    return struct {
+        scanner: Scanner,
+        reader: ReaderType,
+
+        buffer: [buffer_size]u8 = undefined,
+
+        /// The allocator is only used to track `[]` and `{}` nesting levels.
+        pub fn init(allocator: Allocator, io_reader: ReaderType) @This() {
+            return .{
+                .scanner = Scanner.initStreaming(allocator),
+                .reader = io_reader,
+            };
+        }
+        pub fn deinit(self: *@This()) void {
+            self.scanner.deinit();
+            self.* = undefined;
+        }
+
+        /// Calls `std.json.Scanner.enableDiagnostics`.
+        pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+            self.scanner.enableDiagnostics(diagnostics);
+        }
+
+        pub const NextError = ReaderType.Error || Error || Allocator.Error;
+        pub const SkipError = NextError;
+        pub const AllocError = NextError || error{ValueTooLong};
+        pub const PeekError = ReaderType.Error || Error;
+
+        /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+        /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+        pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+            return self.nextAllocMax(allocator, when, default_max_value_len);
+        }
+        /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+        pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+            const token_type = try self.peekNextTokenType();
+            switch (token_type) {
+                .number, .string => {
+                    var value_list = ArrayList(u8).init(allocator);
+                    errdefer {
+                        value_list.deinit();
+                    }
+                    if (try self.allocNextIntoArrayListMax(&value_list, when, max_value_len)) |slice| {
+                        return if (token_type == .number)
+                            Token{ .number = slice }
+                        else
+                            Token{ .string = slice };
+                    } else {
+                        return if (token_type == .number)
+                            Token{ .allocated_number = try value_list.toOwnedSlice() }
+                        else
+                            Token{ .allocated_string = try value_list.toOwnedSlice() };
+                    }
+                },
+
+                // Simple tokens never alloc.
+                .object_begin,
+                .object_end,
+                .array_begin,
+                .array_end,
+                .true,
+                .false,
+                .null,
+                .end_of_document,
+                => return try self.next(),
+            }
+        }
+
+        /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+        pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocError!?[]const u8 {
+            return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+        }
+        /// Calls `std.json.Scanner.allocNextIntoArrayListMax` and handles `error.BufferUnderrun`.
+        pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocError!?[]const u8 {
+            while (true) {
+                return self.scanner.allocNextIntoArrayListMax(value_list, when, max_value_len) catch |err| switch (err) {
+                    error.BufferUnderrun => {
+                        try self.refillBuffer();
+                        continue;
+                    },
+                    else => |other_err| return other_err,
+                };
+            }
+        }
+
+        /// Like `std.json.Scanner.skipValue`, but handles `error.BufferUnderrun`.
+        pub fn skipValue(self: *@This()) SkipError!void {
+            switch (try self.peekNextTokenType()) {
+                .object_begin, .array_begin => {
+                    try self.skipUntilStackHeight(self.stackHeight());
+                },
+                .number, .string => {
+                    while (true) {
+                        switch (try self.next()) {
+                            .partial_number,
+                            .partial_string,
+                            .partial_string_escaped_1,
+                            .partial_string_escaped_2,
+                            .partial_string_escaped_3,
+                            .partial_string_escaped_4,
+                            => continue,
+
+                            .number, .string => break,
+
+                            else => unreachable,
+                        }
+                    }
+                },
+                .true, .false, .null => {
+                    _ = try self.next();
+                },
+
+                .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+            }
+        }
+        /// Like `std.json.Scanner.skipUntilStackHeight()` but handles `error.BufferUnderrun`.
+        pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void {
+            while (true) {
+                return self.scanner.skipUntilStackHeight(terminal_stack_height) catch |err| switch (err) {
+                    error.BufferUnderrun => {
+                        try self.refillBuffer();
+                        continue;
+                    },
+                    else => |other_err| return other_err,
+                };
+            }
+        }
+
+        /// Calls `std.json.Scanner.stackHeight`.
+        pub fn stackHeight(self: *const @This()) u32 {
+            return self.scanner.stackHeight();
+        }
+        /// Calls `std.json.Scanner.ensureTotalStackCapacity`.
+        pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void {
+            try self.scanner.ensureTotalStackCapacity(height);
+        }
+
+        /// See `std.json.Token` for documentation of this function.
+        pub fn next(self: *@This()) NextError!Token {
+            while (true) {
+                return self.scanner.next() catch |err| switch (err) {
+                    error.BufferUnderrun => {
+                        try self.refillBuffer();
+                        continue;
+                    },
+                    else => |other_err| return other_err,
+                };
+            }
+        }
+
+        /// See `std.json.Scanner.peekNextTokenType()`.
+        pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+            while (true) {
+                return self.scanner.peekNextTokenType() catch |err| switch (err) {
+                    error.BufferUnderrun => {
+                        try self.refillBuffer();
+                        continue;
+                    },
+                    else => |other_err| return other_err,
+                };
+            }
+        }
+
+        fn refillBuffer(self: *@This()) ReaderType.Error!void {
+            const input = self.buffer[0..try self.reader.read(self.buffer[0..])];
+            if (input.len > 0) {
+                self.scanner.feedInput(input);
+            } else {
+                self.scanner.endInput();
+            }
+        }
+    };
+}
+
+/// The lowest level parsing API in this package;
+/// supports streaming input with a low memory footprint.
+/// The memory requirement is `O(d)` where d is the nesting depth of `[]` or `{}` containers in the input.
+/// Specifically `d/8` bytes are required for this purpose,
+/// with some extra buffer according to the implementation of `std.ArrayList`.
+///
+/// This scanner can emit partial tokens; see `std.json.Token`.
+/// The input to this class is a sequence of input buffers that you must supply one at a time.
+/// Call `feedInput()` with the first buffer, then call `next()` repeatedly until `error.BufferUnderrun` is returned.
+/// Then call `feedInput()` again and so forth.
+/// Call `endInput()` when the last input buffer has been given to `feedInput()`, either immediately after calling `feedInput()`,
+/// or when `error.BufferUnderrun` requests more data and there is no more.
+/// Be sure to call `next()` after calling `endInput()` until `Token.end_of_document` has been returned.
+pub const Scanner = struct {
+    state: State = .value,
+    string_is_object_key: bool = false,
+    stack: BitStack,
+    value_start: usize = undefined,
+    unicode_code_point: u21 = undefined,
+
+    input: []const u8 = "",
+    cursor: usize = 0,
+    is_end_of_input: bool = false,
+    diagnostics: ?*Diagnostics = null,
+
+    /// The allocator is only used to track `[]` and `{}` nesting levels.
+    pub fn initStreaming(allocator: Allocator) @This() {
+        return .{
+            .stack = BitStack.init(allocator),
+        };
+    }
+    /// Use this if your input is a single slice.
+    /// This is effectively equivalent to:
+    /// ```
+    /// initStreaming(allocator);
+    /// feedInput(complete_input);
+    /// endInput();
+    /// ```
+    pub fn initCompleteInput(allocator: Allocator, complete_input: []const u8) @This() {
+        return .{
+            .stack = BitStack.init(allocator),
+            .input = complete_input,
+            .is_end_of_input = true,
+        };
+    }
+    pub fn deinit(self: *@This()) void {
+        self.stack.deinit();
+        self.* = undefined;
+    }
+
+    pub fn enableDiagnostics(self: *@This(), diagnostics: *Diagnostics) void {
+        diagnostics.cursor_pointer = &self.cursor;
+        self.diagnostics = diagnostics;
+    }
+
+    /// Call this whenever you get `error.BufferUnderrun` from `next()`.
+    /// When there is no more input to provide, call `endInput()`.
+    pub fn feedInput(self: *@This(), input: []const u8) void {
+        assert(self.cursor == self.input.len); // Not done with the last input slice.
+        if (self.diagnostics) |diag| {
+            diag.total_bytes_before_current_input += self.input.len;
+            // This usually goes "negative" to measure how far before the beginning
+            // of the new buffer the current line started.
+            diag.line_start_cursor -%= self.cursor;
+        }
+        self.input = input;
+        self.cursor = 0;
+        self.value_start = 0;
+    }
+    /// Call this when you will no longer call `feedInput()` anymore.
+    /// This can be called either immediately after the last `feedInput()`,
+    /// or at any time afterward, such as when getting `error.BufferUnderrun` from `next()`.
+    /// Don't forget to call `next*()` after `endInput()` until you get `.end_of_document`.
+    pub fn endInput(self: *@This()) void {
+        self.is_end_of_input = true;
+    }
+
+    pub const NextError = Error || Allocator.Error || error{BufferUnderrun};
+    pub const AllocError = Error || Allocator.Error || error{ValueTooLong};
+    pub const PeekError = Error || error{BufferUnderrun};
+    pub const SkipError = Error || Allocator.Error;
+    pub const AllocIntoArrayListError = AllocError || error{BufferUnderrun};
+
+    /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
+    /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+    /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+    pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
+        return self.nextAllocMax(allocator, when, default_max_value_len);
+    }
+
+    /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+    /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
+    pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
+        assert(self.is_end_of_input); // This function is not available in streaming mode.
+        const token_type = self.peekNextTokenType() catch |e| switch (e) {
+            error.BufferUnderrun => unreachable,
+            else => |err| return err,
+        };
+        switch (token_type) {
+            .number, .string => {
+                var value_list = ArrayList(u8).init(allocator);
+                errdefer {
+                    value_list.deinit();
+                }
+                if (self.allocNextIntoArrayListMax(&value_list, when, max_value_len) catch |e| switch (e) {
+                    error.BufferUnderrun => unreachable,
+                    else => |err| return err,
+                }) |slice| {
+                    return if (token_type == .number)
+                        Token{ .number = slice }
+                    else
+                        Token{ .string = slice };
+                } else {
+                    return if (token_type == .number)
+                        Token{ .allocated_number = try value_list.toOwnedSlice() }
+                    else
+                        Token{ .allocated_string = try value_list.toOwnedSlice() };
+                }
+            },
+
+            // Simple tokens never alloc.
+            .object_begin,
+            .object_end,
+            .array_begin,
+            .array_end,
+            .true,
+            .false,
+            .null,
+            .end_of_document,
+            => return self.next() catch |e| switch (e) {
+                error.BufferUnderrun => unreachable,
+                else => |err| return err,
+            },
+        }
+    }
+
+    /// Equivalent to `allocNextIntoArrayListMax(value_list, when, default_max_value_len);`
+    pub fn allocNextIntoArrayList(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen) AllocIntoArrayListError!?[]const u8 {
+        return self.allocNextIntoArrayListMax(value_list, when, default_max_value_len);
+    }
+    /// The next token type must be either `.number` or `.string`. See `peekNextTokenType()`.
+    /// When allocation is not necessary with `.alloc_if_needed`,
+    /// this method returns the content slice from the input buffer, and `value_list` is not touched.
+    /// When allocation is necessary or with `.alloc_always`, this method concatenates partial tokens into the given `value_list`,
+    /// and returns `null` once the final `.number` or `.string` token has been written into it.
+    /// In case of an `error.BufferUnderrun`, partial values will be left in the given value_list.
+    /// The given `value_list` is never reset by this method, so an `error.BufferUnderrun` situation
+    /// can be resumed by passing the same array list in again.
+    /// This method does not indicate whether the token content being returned is for a `.number` or `.string` token type;
+    /// the caller of this method is expected to know which type of token is being processed.
+    pub fn allocNextIntoArrayListMax(self: *@This(), value_list: *ArrayList(u8), when: AllocWhen, max_value_len: usize) AllocIntoArrayListError!?[]const u8 {
+        while (true) {
+            const token = try self.next();
+            switch (token) {
+                // Accumulate partial values.
+                .partial_number, .partial_string => |slice| {
+                    try appendSlice(value_list, slice, max_value_len);
+                },
+                .partial_string_escaped_1 => |buf| {
+                    try appendSlice(value_list, buf[0..], max_value_len);
+                },
+                .partial_string_escaped_2 => |buf| {
+                    try appendSlice(value_list, buf[0..], max_value_len);
+                },
+                .partial_string_escaped_3 => |buf| {
+                    try appendSlice(value_list, buf[0..], max_value_len);
+                },
+                .partial_string_escaped_4 => |buf| {
+                    try appendSlice(value_list, buf[0..], max_value_len);
+                },
+
+                // Return complete values.
+                .number => |slice| {
+                    if (when == .alloc_if_needed and value_list.items.len == 0) {
+                        // No alloc necessary.
+                        return slice;
+                    }
+                    try appendSlice(value_list, slice, max_value_len);
+                    // The token is complete.
+                    return null;
+                },
+                .string => |slice| {
+                    if (when == .alloc_if_needed and value_list.items.len == 0) {
+                        // No alloc necessary.
+                        return slice;
+                    }
+                    try appendSlice(value_list, slice, max_value_len);
+                    // The token is complete.
+                    return null;
+                },
+
+                .object_begin,
+                .object_end,
+                .array_begin,
+                .array_end,
+                .true,
+                .false,
+                .null,
+                .end_of_document,
+                => unreachable, // Only .number and .string token types are allowed here. Check peekNextTokenType() before calling this.
+
+                .allocated_number, .allocated_string => unreachable,
+            }
+        }
+    }
+
+    /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
+    /// If the next token type is `.object_begin` or `.array_begin`,
+    /// this function calls `next()` repeatedly until the corresponding `.object_end` or `.array_end` is found.
+    /// If the next token type is `.number` or `.string`,
+    /// this function calls `next()` repeatedly until the (non `.partial_*`) `.number` or `.string` token is found.
+    /// If the next token type is `.true`, `.false`, or `.null`, this function calls `next()` once.
+    /// The next token type must not be `.object_end`, `.array_end`, or `.end_of_document`;
+    /// see `peekNextTokenType()`.
+    pub fn skipValue(self: *@This()) SkipError!void {
+        assert(self.is_end_of_input); // This function is not available in streaming mode.
+        switch (self.peekNextTokenType() catch |e| switch (e) {
+            error.BufferUnderrun => unreachable,
+            else => |err| return err,
+        }) {
+            .object_begin, .array_begin => {
+                self.skipUntilStackHeight(self.stackHeight()) catch |e| switch (e) {
+                    error.BufferUnderrun => unreachable,
+                    else => |err| return err,
+                };
+            },
+            .number, .string => {
+                while (true) {
+                    switch (self.next() catch |e| switch (e) {
+                        error.BufferUnderrun => unreachable,
+                        else => |err| return err,
+                    }) {
+                        .partial_number,
+                        .partial_string,
+                        .partial_string_escaped_1,
+                        .partial_string_escaped_2,
+                        .partial_string_escaped_3,
+                        .partial_string_escaped_4,
+                        => continue,
+
+                        .number, .string => break,
+
+                        else => unreachable,
+                    }
+                }
+            },
+            .true, .false, .null => {
+                _ = self.next() catch |e| switch (e) {
+                    error.BufferUnderrun => unreachable,
+                    else => |err| return err,
+                };
+            },
+
+            .object_end, .array_end, .end_of_document => unreachable, // Attempt to skip a non-value token.
+        }
+    }
+
+    /// Skip tokens until an `.object_end` or `.array_end` token results in a `stackHeight()` equal the given stack height.
+    /// Unlike `skipValue()`, this function is available in streaming mode.
+    pub fn skipUntilStackHeight(self: *@This(), terminal_stack_height: u32) NextError!void {
+        while (true) {
+            switch (try self.next()) {
+                .object_end, .array_end => {
+                    if (self.stackHeight() == terminal_stack_height) break;
+                },
+                .end_of_document => unreachable,
+                else => continue,
+            }
+        }
+    }
+
+    /// The depth of `{}` or `[]` nesting levels at the current position.
+    pub fn stackHeight(self: *const @This()) u32 {
+        return self.stack.bit_len;
+    }
+
+    /// Pre allocate memory to hold the given number of nesting levels.
+    /// `stackHeight()` up to the given number will not cause allocations.
+    pub fn ensureTotalStackCapacity(self: *@This(), height: u32) Allocator.Error!void {
+        try self.stack.ensureTotalCapacity(height);
+    }
+
+    /// See `std.json.Token` for documentation of this function.
+    pub fn next(self: *@This()) NextError!Token {
+        state_loop: while (true) {
+            switch (self.state) {
+                .value => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        // Object, Array
+                        '{' => {
+                            try self.stack.push(OBJECT_MODE);
+                            self.cursor += 1;
+                            self.state = .object_start;
+                            return .object_begin;
+                        },
+                        '[' => {
+                            try self.stack.push(ARRAY_MODE);
+                            self.cursor += 1;
+                            self.state = .array_start;
+                            return .array_begin;
+                        },
+
+                        // String
+                        '"' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            continue :state_loop;
+                        },
+
+                        // Number
+                        '1'...'9' => {
+                            self.value_start = self.cursor;
+                            self.cursor += 1;
+                            self.state = .number_int;
+                            continue :state_loop;
+                        },
+                        '0' => {
+                            self.value_start = self.cursor;
+                            self.cursor += 1;
+                            self.state = .number_leading_zero;
+                            continue :state_loop;
+                        },
+                        '-' => {
+                            self.value_start = self.cursor;
+                            self.cursor += 1;
+                            self.state = .number_minus;
+                            continue :state_loop;
+                        },
+
+                        // literal values
+                        't' => {
+                            self.cursor += 1;
+                            self.state = .literal_t;
+                            continue :state_loop;
+                        },
+                        'f' => {
+                            self.cursor += 1;
+                            self.state = .literal_f;
+                            continue :state_loop;
+                        },
+                        'n' => {
+                            self.cursor += 1;
+                            self.state = .literal_n;
+                            continue :state_loop;
+                        },
+
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .post_value => {
+                    if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+                    const c = self.input[self.cursor];
+                    if (self.string_is_object_key) {
+                        self.string_is_object_key = false;
+                        switch (c) {
+                            ':' => {
+                                self.cursor += 1;
+                                self.state = .value;
+                                continue :state_loop;
+                            },
+                            else => return error.SyntaxError,
+                        }
+                    }
+
+                    switch (c) {
+                        '}' => {
+                            if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError;
+                            self.cursor += 1;
+                            // stay in .post_value state.
+                            return .object_end;
+                        },
+                        ']' => {
+                            if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError;
+                            self.cursor += 1;
+                            // stay in .post_value state.
+                            return .array_end;
+                        },
+                        ',' => {
+                            switch (self.stack.peek()) {
+                                OBJECT_MODE => {
+                                    self.state = .object_post_comma;
+                                },
+                                ARRAY_MODE => {
+                                    self.state = .value;
+                                },
+                            }
+                            self.cursor += 1;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .object_start => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        '"' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            self.string_is_object_key = true;
+                            continue :state_loop;
+                        },
+                        '}' => {
+                            self.cursor += 1;
+                            _ = self.stack.pop();
+                            self.state = .post_value;
+                            return .object_end;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .object_post_comma => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        '"' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            self.string_is_object_key = true;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .array_start => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        ']' => {
+                            self.cursor += 1;
+                            _ = self.stack.pop();
+                            self.state = .post_value;
+                            return .array_end;
+                        },
+                        else => {
+                            self.state = .value;
+                            continue :state_loop;
+                        },
+                    }
+                },
+
+                .number_minus => {
+                    if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+                    switch (self.input[self.cursor]) {
+                        '0' => {
+                            self.cursor += 1;
+                            self.state = .number_leading_zero;
+                            continue :state_loop;
+                        },
+                        '1'...'9' => {
+                            self.cursor += 1;
+                            self.state = .number_int;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .number_leading_zero => {
+                    if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true);
+                    switch (self.input[self.cursor]) {
+                        '.' => {
+                            self.cursor += 1;
+                            self.state = .number_post_dot;
+                            continue :state_loop;
+                        },
+                        'e', 'E' => {
+                            self.cursor += 1;
+                            self.state = .number_post_e;
+                            continue :state_loop;
+                        },
+                        else => {
+                            self.state = .post_value;
+                            return Token{ .number = self.takeValueSlice() };
+                        },
+                    }
+                },
+                .number_int => {
+                    while (self.cursor < self.input.len) : (self.cursor += 1) {
+                        switch (self.input[self.cursor]) {
+                            '0'...'9' => continue,
+                            '.' => {
+                                self.cursor += 1;
+                                self.state = .number_post_dot;
+                                continue :state_loop;
+                            },
+                            'e', 'E' => {
+                                self.cursor += 1;
+                                self.state = .number_post_e;
+                                continue :state_loop;
+                            },
+                            else => {
+                                self.state = .post_value;
+                                return Token{ .number = self.takeValueSlice() };
+                            },
+                        }
+                    }
+                    return self.endOfBufferInNumber(true);
+                },
+                .number_post_dot => {
+                    if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+                    switch (try self.expectByte()) {
+                        '0'...'9' => {
+                            self.cursor += 1;
+                            self.state = .number_frac;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .number_frac => {
+                    while (self.cursor < self.input.len) : (self.cursor += 1) {
+                        switch (self.input[self.cursor]) {
+                            '0'...'9' => continue,
+                            'e', 'E' => {
+                                self.cursor += 1;
+                                self.state = .number_post_e;
+                                continue :state_loop;
+                            },
+                            else => {
+                                self.state = .post_value;
+                                return Token{ .number = self.takeValueSlice() };
+                            },
+                        }
+                    }
+                    return self.endOfBufferInNumber(true);
+                },
+                .number_post_e => {
+                    if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+                    switch (self.input[self.cursor]) {
+                        '0'...'9' => {
+                            self.cursor += 1;
+                            self.state = .number_exp;
+                            continue :state_loop;
+                        },
+                        '+', '-' => {
+                            self.cursor += 1;
+                            self.state = .number_post_e_sign;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .number_post_e_sign => {
+                    if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false);
+                    switch (self.input[self.cursor]) {
+                        '0'...'9' => {
+                            self.cursor += 1;
+                            self.state = .number_exp;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .number_exp => {
+                    while (self.cursor < self.input.len) : (self.cursor += 1) {
+                        switch (self.input[self.cursor]) {
+                            '0'...'9' => continue,
+                            else => {
+                                self.state = .post_value;
+                                return Token{ .number = self.takeValueSlice() };
+                            },
+                        }
+                    }
+                    return self.endOfBufferInNumber(true);
+                },
+
+                .string => {
+                    while (self.cursor < self.input.len) : (self.cursor += 1) {
+                        switch (self.input[self.cursor]) {
+                            0...0x1f => return error.SyntaxError, // Bare ASCII control code in string.
+
+                            // ASCII plain text.
+                            0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue,
+
+                            // Special characters.
+                            '"' => {
+                                const result = Token{ .string = self.takeValueSlice() };
+                                self.cursor += 1;
+                                self.state = .post_value;
+                                return result;
+                            },
+                            '\\' => {
+                                const slice = self.takeValueSlice();
+                                self.cursor += 1;
+                                self.state = .string_backslash;
+                                if (slice.len > 0) return Token{ .partial_string = slice };
+                                continue :state_loop;
+                            },
+
+                            // UTF-8 validation.
+                            // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+                            0xC2...0xDF => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_last_byte;
+                                continue :state_loop;
+                            },
+                            0xE0 => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_second_to_last_byte_guard_against_overlong;
+                                continue :state_loop;
+                            },
+                            0xE1...0xEC, 0xEE...0xEF => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_second_to_last_byte;
+                                continue :state_loop;
+                            },
+                            0xED => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half;
+                                continue :state_loop;
+                            },
+                            0xF0 => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_third_to_last_byte_guard_against_overlong;
+                                continue :state_loop;
+                            },
+                            0xF1...0xF3 => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_third_to_last_byte;
+                                continue :state_loop;
+                            },
+                            0xF4 => {
+                                self.cursor += 1;
+                                self.state = .string_utf8_third_to_last_byte_guard_against_too_large;
+                                continue :state_loop;
+                            },
+                            0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8.
+                        }
+                    }
+                    if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+                    const slice = self.takeValueSlice();
+                    if (slice.len > 0) return Token{ .partial_string = slice };
+                    return error.BufferUnderrun;
+                },
+                .string_backslash => {
+                    switch (try self.expectByte()) {
+                        '"', '\\', '/' => {
+                            // Since these characters now represent themselves literally,
+                            // we can simply begin the next plaintext slice here.
+                            self.value_start = self.cursor;
+                            self.cursor += 1;
+                            self.state = .string;
+                            continue :state_loop;
+                        },
+                        'b' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return Token{ .partial_string_escaped_1 = [_]u8{0x08} };
+                        },
+                        'f' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return Token{ .partial_string_escaped_1 = [_]u8{0x0c} };
+                        },
+                        'n' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return Token{ .partial_string_escaped_1 = [_]u8{'\n'} };
+                        },
+                        'r' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return Token{ .partial_string_escaped_1 = [_]u8{'\r'} };
+                        },
+                        't' => {
+                            self.cursor += 1;
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return Token{ .partial_string_escaped_1 = [_]u8{'\t'} };
+                        },
+                        'u' => {
+                            self.cursor += 1;
+                            self.state = .string_backslash_u;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .string_backslash_u => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.unicode_code_point = @as(u21, c - '0') << 12;
+                        },
+                        'A'...'F' => {
+                            self.unicode_code_point = @as(u21, c - 'A' + 10) << 12;
+                        },
+                        'a'...'f' => {
+                            self.unicode_code_point = @as(u21, c - 'a' + 10) << 12;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                    self.cursor += 1;
+                    self.state = .string_backslash_u_1;
+                    continue :state_loop;
+                },
+                .string_backslash_u_1 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.unicode_code_point |= @as(u21, c - '0') << 8;
+                        },
+                        'A'...'F' => {
+                            self.unicode_code_point |= @as(u21, c - 'A' + 10) << 8;
+                        },
+                        'a'...'f' => {
+                            self.unicode_code_point |= @as(u21, c - 'a' + 10) << 8;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                    self.cursor += 1;
+                    self.state = .string_backslash_u_2;
+                    continue :state_loop;
+                },
+                .string_backslash_u_2 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.unicode_code_point |= @as(u21, c - '0') << 4;
+                        },
+                        'A'...'F' => {
+                            self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4;
+                        },
+                        'a'...'f' => {
+                            self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                    self.cursor += 1;
+                    self.state = .string_backslash_u_3;
+                    continue :state_loop;
+                },
+                .string_backslash_u_3 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.unicode_code_point |= c - '0';
+                        },
+                        'A'...'F' => {
+                            self.unicode_code_point |= c - 'A' + 10;
+                        },
+                        'a'...'f' => {
+                            self.unicode_code_point |= c - 'a' + 10;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                    self.cursor += 1;
+                    switch (self.unicode_code_point) {
+                        0xD800...0xDBFF => {
+                            // High surrogate half.
+                            self.unicode_code_point = 0x10000 | (self.unicode_code_point << 10);
+                            self.state = .string_surrogate_half;
+                            continue :state_loop;
+                        },
+                        0xDC00...0xDFFF => return error.SyntaxError, // Unexpected low surrogate half.
+                        else => {
+                            // Code point from a single UTF-16 code unit.
+                            self.value_start = self.cursor;
+                            self.state = .string;
+                            return self.partialStringCodepoint();
+                        },
+                    }
+                },
+                .string_surrogate_half => {
+                    switch (try self.expectByte()) {
+                        '\\' => {
+                            self.cursor += 1;
+                            self.state = .string_surrogate_half_backslash;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Expected low surrogate half.
+                    }
+                },
+                .string_surrogate_half_backslash => {
+                    switch (try self.expectByte()) {
+                        'u' => {
+                            self.cursor += 1;
+                            self.state = .string_surrogate_half_backslash_u;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Expected low surrogate half.
+                    }
+                },
+                .string_surrogate_half_backslash_u => {
+                    switch (try self.expectByte()) {
+                        'D', 'd' => {
+                            self.cursor += 1;
+                            self.state = .string_surrogate_half_backslash_u_1;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Expected low surrogate half.
+                    }
+                },
+                .string_surrogate_half_backslash_u_1 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        'C'...'F' => {
+                            self.cursor += 1;
+                            self.unicode_code_point |= @as(u21, c - 'C') << 8;
+                            self.state = .string_surrogate_half_backslash_u_2;
+                            continue :state_loop;
+                        },
+                        'c'...'f' => {
+                            self.cursor += 1;
+                            self.unicode_code_point |= @as(u21, c - 'c') << 8;
+                            self.state = .string_surrogate_half_backslash_u_2;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Expected low surrogate half.
+                    }
+                },
+                .string_surrogate_half_backslash_u_2 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.cursor += 1;
+                            self.unicode_code_point |= @as(u21, c - '0') << 4;
+                            self.state = .string_surrogate_half_backslash_u_3;
+                            continue :state_loop;
+                        },
+                        'A'...'F' => {
+                            self.cursor += 1;
+                            self.unicode_code_point |= @as(u21, c - 'A' + 10) << 4;
+                            self.state = .string_surrogate_half_backslash_u_3;
+                            continue :state_loop;
+                        },
+                        'a'...'f' => {
+                            self.cursor += 1;
+                            self.unicode_code_point |= @as(u21, c - 'a' + 10) << 4;
+                            self.state = .string_surrogate_half_backslash_u_3;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .string_surrogate_half_backslash_u_3 => {
+                    const c = try self.expectByte();
+                    switch (c) {
+                        '0'...'9' => {
+                            self.unicode_code_point |= c - '0';
+                        },
+                        'A'...'F' => {
+                            self.unicode_code_point |= c - 'A' + 10;
+                        },
+                        'a'...'f' => {
+                            self.unicode_code_point |= c - 'a' + 10;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                    self.cursor += 1;
+                    self.value_start = self.cursor;
+                    self.state = .string;
+                    return self.partialStringCodepoint();
+                },
+
+                .string_utf8_last_byte => {
+                    switch (try self.expectByte()) {
+                        0x80...0xBF => {
+                            self.cursor += 1;
+                            self.state = .string;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_second_to_last_byte => {
+                    switch (try self.expectByte()) {
+                        0x80...0xBF => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_second_to_last_byte_guard_against_overlong => {
+                    switch (try self.expectByte()) {
+                        0xA0...0xBF => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_second_to_last_byte_guard_against_surrogate_half => {
+                    switch (try self.expectByte()) {
+                        0x80...0x9F => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_third_to_last_byte => {
+                    switch (try self.expectByte()) {
+                        0x80...0xBF => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_second_to_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_third_to_last_byte_guard_against_overlong => {
+                    switch (try self.expectByte()) {
+                        0x90...0xBF => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_second_to_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+                .string_utf8_third_to_last_byte_guard_against_too_large => {
+                    switch (try self.expectByte()) {
+                        0x80...0x8F => {
+                            self.cursor += 1;
+                            self.state = .string_utf8_second_to_last_byte;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError, // Invalid UTF-8.
+                    }
+                },
+
+                .literal_t => {
+                    switch (try self.expectByte()) {
+                        'r' => {
+                            self.cursor += 1;
+                            self.state = .literal_tr;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_tr => {
+                    switch (try self.expectByte()) {
+                        'u' => {
+                            self.cursor += 1;
+                            self.state = .literal_tru;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_tru => {
+                    switch (try self.expectByte()) {
+                        'e' => {
+                            self.cursor += 1;
+                            self.state = .post_value;
+                            return .true;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_f => {
+                    switch (try self.expectByte()) {
+                        'a' => {
+                            self.cursor += 1;
+                            self.state = .literal_fa;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_fa => {
+                    switch (try self.expectByte()) {
+                        'l' => {
+                            self.cursor += 1;
+                            self.state = .literal_fal;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_fal => {
+                    switch (try self.expectByte()) {
+                        's' => {
+                            self.cursor += 1;
+                            self.state = .literal_fals;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_fals => {
+                    switch (try self.expectByte()) {
+                        'e' => {
+                            self.cursor += 1;
+                            self.state = .post_value;
+                            return .false;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_n => {
+                    switch (try self.expectByte()) {
+                        'u' => {
+                            self.cursor += 1;
+                            self.state = .literal_nu;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_nu => {
+                    switch (try self.expectByte()) {
+                        'l' => {
+                            self.cursor += 1;
+                            self.state = .literal_nul;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+                .literal_nul => {
+                    switch (try self.expectByte()) {
+                        'l' => {
+                            self.cursor += 1;
+                            self.state = .post_value;
+                            return .null;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+            }
+            unreachable;
+        }
+    }
+
+    /// Seeks ahead in the input until the first byte of the next token (or the end of the input)
+    /// determines which type of token will be returned from the next `next*()` call.
+    /// This function is idempotent, only advancing past commas, colons, and inter-token whitespace.
+    pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
+        state_loop: while (true) {
+            switch (self.state) {
+                .value => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        '{' => return .object_begin,
+                        '[' => return .array_begin,
+                        '"' => return .string,
+                        '-', '0'...'9' => return .number,
+                        't' => return .true,
+                        'f' => return .false,
+                        'n' => return .null,
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .post_value => {
+                    if (try self.skipWhitespaceCheckEnd()) return .end_of_document;
+
+                    const c = self.input[self.cursor];
+                    if (self.string_is_object_key) {
+                        self.string_is_object_key = false;
+                        switch (c) {
+                            ':' => {
+                                self.cursor += 1;
+                                self.state = .value;
+                                continue :state_loop;
+                            },
+                            else => return error.SyntaxError,
+                        }
+                    }
+
+                    switch (c) {
+                        '}' => return .object_end,
+                        ']' => return .array_end,
+                        ',' => {
+                            switch (self.stack.peek()) {
+                                OBJECT_MODE => {
+                                    self.state = .object_post_comma;
+                                },
+                                ARRAY_MODE => {
+                                    self.state = .value;
+                                },
+                            }
+                            self.cursor += 1;
+                            continue :state_loop;
+                        },
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .object_start => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        '"' => return .string,
+                        '}' => return .object_end,
+                        else => return error.SyntaxError,
+                    }
+                },
+                .object_post_comma => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        '"' => return .string,
+                        else => return error.SyntaxError,
+                    }
+                },
+
+                .array_start => {
+                    switch (try self.skipWhitespaceExpectByte()) {
+                        ']' => return .array_end,
+                        else => {
+                            self.state = .value;
+                            continue :state_loop;
+                        },
+                    }
+                },
+
+                .number_minus,
+                .number_leading_zero,
+                .number_int,
+                .number_post_dot,
+                .number_frac,
+                .number_post_e,
+                .number_post_e_sign,
+                .number_exp,
+                => return .number,
+
+                .string,
+                .string_backslash,
+                .string_backslash_u,
+                .string_backslash_u_1,
+                .string_backslash_u_2,
+                .string_backslash_u_3,
+                .string_surrogate_half,
+                .string_surrogate_half_backslash,
+                .string_surrogate_half_backslash_u,
+                .string_surrogate_half_backslash_u_1,
+                .string_surrogate_half_backslash_u_2,
+                .string_surrogate_half_backslash_u_3,
+                => return .string,
+
+                .string_utf8_last_byte,
+                .string_utf8_second_to_last_byte,
+                .string_utf8_second_to_last_byte_guard_against_overlong,
+                .string_utf8_second_to_last_byte_guard_against_surrogate_half,
+                .string_utf8_third_to_last_byte,
+                .string_utf8_third_to_last_byte_guard_against_overlong,
+                .string_utf8_third_to_last_byte_guard_against_too_large,
+                => return .string,
+
+                .literal_t,
+                .literal_tr,
+                .literal_tru,
+                => return .true,
+                .literal_f,
+                .literal_fa,
+                .literal_fal,
+                .literal_fals,
+                => return .false,
+                .literal_n,
+                .literal_nu,
+                .literal_nul,
+                => return .null,
+            }
+            unreachable;
+        }
+    }
+
+    const State = enum {
+        value,
+        post_value,
+
+        object_start,
+        object_post_comma,
+
+        array_start,
+
+        number_minus,
+        number_leading_zero,
+        number_int,
+        number_post_dot,
+        number_frac,
+        number_post_e,
+        number_post_e_sign,
+        number_exp,
+
+        string,
+        string_backslash,
+        string_backslash_u,
+        string_backslash_u_1,
+        string_backslash_u_2,
+        string_backslash_u_3,
+        string_surrogate_half,
+        string_surrogate_half_backslash,
+        string_surrogate_half_backslash_u,
+        string_surrogate_half_backslash_u_1,
+        string_surrogate_half_backslash_u_2,
+        string_surrogate_half_backslash_u_3,
+
+        // From http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String
+        string_utf8_last_byte, // State A
+        string_utf8_second_to_last_byte, // State B
+        string_utf8_second_to_last_byte_guard_against_overlong, // State C
+        string_utf8_second_to_last_byte_guard_against_surrogate_half, // State D
+        string_utf8_third_to_last_byte, // State E
+        string_utf8_third_to_last_byte_guard_against_overlong, // State F
+        string_utf8_third_to_last_byte_guard_against_too_large, // State G
+
+        literal_t,
+        literal_tr,
+        literal_tru,
+        literal_f,
+        literal_fa,
+        literal_fal,
+        literal_fals,
+        literal_n,
+        literal_nu,
+        literal_nul,
+    };
+
+    fn expectByte(self: *const @This()) !u8 {
+        if (self.cursor < self.input.len) {
+            return self.input[self.cursor];
+        }
+        // No byte.
+        if (self.is_end_of_input) return error.UnexpectedEndOfInput;
+        return error.BufferUnderrun;
+    }
+
+    fn skipWhitespace(self: *@This()) void {
+        while (self.cursor < self.input.len) : (self.cursor += 1) {
+            switch (self.input[self.cursor]) {
+                // Whitespace
+                ' ', '\t', '\r' => continue,
+                '\n' => {
+                    if (self.diagnostics) |diag| {
+                        diag.line_number += 1;
+                        // This will count the newline itself,
+                        // which means a straight-forward subtraction will give a 1-based column number.
+                        diag.line_start_cursor = self.cursor;
+                    }
+                    continue;
+                },
+                else => return,
+            }
+        }
+    }
+
+    fn skipWhitespaceExpectByte(self: *@This()) !u8 {
+        self.skipWhitespace();
+        return self.expectByte();
+    }
+
+    fn skipWhitespaceCheckEnd(self: *@This()) !bool {
+        self.skipWhitespace();
+        if (self.cursor >= self.input.len) {
+            // End of buffer.
+            if (self.is_end_of_input) {
+                // End of everything.
+                if (self.stackHeight() == 0) {
+                    // We did it!
+                    return true;
+                }
+                return error.UnexpectedEndOfInput;
+            }
+            return error.BufferUnderrun;
+        }
+        if (self.stackHeight() == 0) return error.SyntaxError;
+        return false;
+    }
+
+    fn takeValueSlice(self: *@This()) []const u8 {
+        const slice = self.input[self.value_start..self.cursor];
+        self.value_start = self.cursor;
+        return slice;
+    }
+
+    fn endOfBufferInNumber(self: *@This(), allow_end: bool) !Token {
+        const slice = self.takeValueSlice();
+        if (self.is_end_of_input) {
+            if (!allow_end) return error.UnexpectedEndOfInput;
+            self.state = .post_value;
+            return Token{ .number = slice };
+        }
+        if (slice.len == 0) return error.BufferUnderrun;
+        return Token{ .partial_number = slice };
+    }
+
+    fn partialStringCodepoint(self: *@This()) Token {
+        const code_point = self.unicode_code_point;
+        self.unicode_code_point = undefined;
+        var buf: [4]u8 = undefined;
+        switch (std.unicode.utf8Encode(code_point, &buf) catch unreachable) {
+            1 => return Token{ .partial_string_escaped_1 = buf[0..1].* },
+            2 => return Token{ .partial_string_escaped_2 = buf[0..2].* },
+            3 => return Token{ .partial_string_escaped_3 = buf[0..3].* },
+            4 => return Token{ .partial_string_escaped_4 = buf[0..4].* },
+            else => unreachable,
+        }
+    }
+};
+
+const OBJECT_MODE = 0;
+const ARRAY_MODE = 1;
+
+const BitStack = struct {
+    bytes: std.ArrayList(u8),
+    bit_len: u32 = 0,
+
+    pub fn init(allocator: Allocator) @This() {
+        return .{
+            .bytes = std.ArrayList(u8).init(allocator),
+        };
+    }
+
+    pub fn deinit(self: *@This()) void {
+        self.bytes.deinit();
+        self.* = undefined;
+    }
+
+    pub fn ensureTotalCapacity(self: *@This(), bit_capcity: u32) Allocator.Error!void {
+        const byte_capacity = (bit_capcity + 7) >> 3;
+        try self.bytes.ensureTotalCapacity(byte_capacity);
+    }
+
+    pub fn push(self: *@This(), b: u1) Allocator.Error!void {
+        const byte_index = self.bit_len >> 3;
+        const bit_index = @intCast(u3, self.bit_len & 7);
+
+        if (self.bytes.items.len <= byte_index) {
+            try self.bytes.append(0);
+        }
+
+        self.bytes.items[byte_index] &= ~(@as(u8, 1) << bit_index);
+        self.bytes.items[byte_index] |= @as(u8, b) << bit_index;
+
+        self.bit_len += 1;
+    }
+
+    pub fn peek(self: *const @This()) u1 {
+        const byte_index = (self.bit_len - 1) >> 3;
+        const bit_index = @intCast(u3, (self.bit_len - 1) & 7);
+        return @intCast(u1, (self.bytes.items[byte_index] >> bit_index) & 1);
+    }
+
+    pub fn pop(self: *@This()) u1 {
+        const b = self.peek();
+        self.bit_len -= 1;
+        return b;
+    }
+};
+
+fn appendSlice(list: *std.ArrayList(u8), buf: []const u8, max_value_len: usize) !void {
+    const new_len = std.math.add(usize, list.items.len, buf.len) catch return error.ValueTooLong;
+    if (new_len > max_value_len) return error.ValueTooLong;
+    try list.appendSlice(buf);
+}
+
+/// For the slice you get from a `Token.number` or `Token.allocated_number`,
+/// this function returns true if the number doesn't contain any fraction or exponent components.
+/// Note, the numeric value encoded by the value may still be an integer, such as `1.0`.
+/// This function is meant to give a hint about whether integer parsing or float parsing should be used on the value.
+/// This function will not give meaningful results on non-numeric input.
+pub fn isNumberFormattedLikeAnInteger(value: []const u8) bool {
+    return std.mem.indexOfAny(u8, value, ".eE") == null;
+}
+
+test {
+    _ = @import("./scanner_test.zig");
+}
author	Loris Cro <kappaloris@gmail.com>	2023-06-18 09:06:40 +0200
committer	GitHub <noreply@github.com>	2023-06-18 09:06:40 +0200
commit	216ef10dc471e4db60a30208be178d6c59efeaaf (patch)
tree	8c239dab283ae9cb3b7fe099bae240bcc53f894e /lib/std/json/scanner.zig
parent	0fc1d396495c1ab482197021dedac8bea3f9401c (diff)
parent	729a051e9e38674233190aea23c0ac8c134f2d67 (diff)
download	zig-216ef10dc471e4db60a30208be178d6c59efeaaf.tar.gz zig-216ef10dc471e4db60a30208be178d6c59efeaaf.zip