diff options
| author | Josh Wolfe <thejoshwolfe@gmail.com> | 2023-05-13 14:31:53 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-13 14:31:53 -0400 |
| commit | 018b743c7a83c2af5e5b6ba9aae1a4703e306f71 (patch) | |
| tree | 3e113cc28cc3dcaace4917980c2813b1a6de2654 /lib/std/json.zig | |
| parent | c7bf8bab38f8b89c1371eedb9229e00a29b5ca5b (diff) | |
| download | zig-018b743c7a83c2af5e5b6ba9aae1a4703e306f71.tar.gz zig-018b743c7a83c2af5e5b6ba9aae1a4703e306f71.zip | |
std: Rewrite low-level json api to support streaming (#15602)
Diffstat (limited to 'lib/std/json.zig')
| -rw-r--r-- | lib/std/json.zig | 2859 |
1 files changed, 50 insertions, 2809 deletions
diff --git a/lib/std/json.zig b/lib/std/json.zig index 011463faef..10449cdace 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1,2818 +1,59 @@ -// JSON parser conforming to RFC8259. -// -// https://tools.ietf.org/html/rfc8259 - -const builtin = @import("builtin"); -const std = @import("std.zig"); -const debug = std.debug; -const assert = debug.assert; -const testing = std.testing; -const mem = std.mem; -const maxInt = std.math.maxInt; +//! JSON parsing and stringification conforming to RFC 8259. https://datatracker.ietf.org/doc/html/rfc8259 +//! +//! The low-level `Scanner` API reads from an input slice or successive slices of inputs, +//! The `Reader` API connects a `std.io.Reader` to a `Scanner`. +//! +//! The high-level `parseFromSlice` and `parseFromTokenSource` deserializes a JSON document into a Zig type. +//! The high-level `Parser` parses any JSON document into a dynamically typed `ValueTree` that has its own memory arena. +//! +//! The low-level `writeStream` emits syntax-conformant JSON tokens to a `std.io.Writer`. +//! The high-level `stringify` serializes a Zig type into JSON. + +pub const ValueTree = @import("json/dynamic.zig").ValueTree; +pub const ObjectMap = @import("json/dynamic.zig").ObjectMap; +pub const Array = @import("json/dynamic.zig").Array; +pub const Value = @import("json/dynamic.zig").Value; +pub const Parser = @import("json/dynamic.zig").Parser; + +pub const validate = @import("json/scanner.zig").validate; +pub const Error = @import("json/scanner.zig").Error; +pub const reader = @import("json/scanner.zig").reader; +pub const default_buffer_size = @import("json/scanner.zig").default_buffer_size; +pub const Token = @import("json/scanner.zig").Token; +pub const TokenType = @import("json/scanner.zig").TokenType; +pub const Diagnostics = @import("json/scanner.zig").Diagnostics; +pub const AllocWhen = @import("json/scanner.zig").AllocWhen; +pub const default_max_value_len = @import("json/scanner.zig").default_max_value_len; +pub const Reader = @import("json/scanner.zig").Reader; +pub const Scanner = @import("json/scanner.zig").Scanner; +pub const isNumberFormattedLikeAnInteger = @import("json/scanner.zig").isNumberFormattedLikeAnInteger; + +pub const ParseOptions = @import("json/static.zig").ParseOptions; +pub const parseFromSlice = @import("json/static.zig").parseFromSlice; +pub const parseFromTokenSource = @import("json/static.zig").parseFromTokenSource; +pub const ParseError = @import("json/static.zig").ParseError; +pub const parseFree = @import("json/static.zig").parseFree; + +pub const StringifyOptions = @import("json/stringify.zig").StringifyOptions; +pub const encodeJsonString = @import("json/stringify.zig").encodeJsonString; +pub const encodeJsonStringChars = @import("json/stringify.zig").encodeJsonStringChars; +pub const stringify = @import("json/stringify.zig").stringify; +pub const stringifyAlloc = @import("json/stringify.zig").stringifyAlloc; pub const WriteStream = @import("json/write_stream.zig").WriteStream; pub const writeStream = @import("json/write_stream.zig").writeStream; -const StringEscapes = union(enum) { - None, - - Some: struct { - size_diff: isize, - }, -}; - -/// Checks to see if a string matches what it would be as a json-encoded string -/// Assumes that `encoded` is a well-formed json string -fn encodesTo(decoded: []const u8, encoded: []const u8) bool { - var i: usize = 0; - var j: usize = 0; - while (i < decoded.len) { - if (j >= encoded.len) return false; - if (encoded[j] != '\\') { - if (decoded[i] != encoded[j]) return false; - j += 1; - i += 1; - } else { - const escape_type = encoded[j + 1]; - if (escape_type != 'u') { - const t: u8 = switch (escape_type) { - '\\' => '\\', - '/' => '/', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'f' => 12, - 'b' => 8, - '"' => '"', - else => unreachable, - }; - if (decoded[i] != t) return false; - j += 2; - i += 1; - } else { - var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; - j += 6; - if (codepoint >= 0xD800 and codepoint < 0xDC00) { - // surrogate pair - assert(encoded[j] == '\\'); - assert(encoded[j + 1] == 'u'); - const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; - codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff)); - j += 6; - } - var buf: [4]u8 = undefined; - const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable; - if (i + len > decoded.len) return false; - if (!mem.eql(u8, decoded[i..][0..len], buf[0..len])) return false; - i += len; - } - } - } - assert(i == decoded.len); - assert(j == encoded.len); - return true; -} - -/// A single token slice into the parent string. -/// -/// Use `token.slice()` on the input at the current position to get the current slice. -pub const Token = union(enum) { - ObjectBegin, - ObjectEnd, - ArrayBegin, - ArrayEnd, - String: struct { - /// How many bytes the token is. - count: usize, - - /// Whether string contains an escape sequence and cannot be zero-copied - escapes: StringEscapes, - - pub fn decodedLength(self: @This()) usize { - return self.count +% switch (self.escapes) { - .None => 0, - .Some => |s| @bitCast(usize, s.size_diff), - }; - } - - /// Slice into the underlying input string. - pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 { - return input[i - self.count .. i]; - } - }, - Number: struct { - /// How many bytes the token is. - count: usize, - - /// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`) - is_integer: bool, - - /// Slice into the underlying input string. - pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 { - return input[i - self.count .. i]; - } - }, - True, - False, - Null, -}; - -const AggregateContainerType = enum(u1) { object, array }; - -// A LIFO bit-stack. Tracks which container-types have been entered during parse. -fn AggregateContainerStack(comptime n: usize) type { - return struct { - const Self = @This(); - - const element_bitcount = 8 * @sizeOf(usize); - const element_count = n / element_bitcount; - const ElementType = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = element_bitcount } }); - const ElementShiftAmountType = std.math.Log2Int(ElementType); - - comptime { - std.debug.assert(n % element_bitcount == 0); - } - - memory: [element_count]ElementType, - len: usize, - - pub fn init(self: *Self) void { - self.memory = [_]ElementType{0} ** element_count; - self.len = 0; - } - - pub fn push(self: *Self, ty: AggregateContainerType) ?void { - if (self.len >= n) { - return null; - } - - const index = self.len / element_bitcount; - const sub_index = @intCast(ElementShiftAmountType, self.len % element_bitcount); - const clear_mask = ~(@as(ElementType, 1) << sub_index); - const set_bits = @as(ElementType, @enumToInt(ty)) << sub_index; - - self.memory[index] &= clear_mask; - self.memory[index] |= set_bits; - self.len += 1; - } - - pub fn peek(self: *Self) ?AggregateContainerType { - if (self.len == 0) { - return null; - } - - const bit_to_extract = self.len - 1; - const index = bit_to_extract / element_bitcount; - const sub_index = @intCast(ElementShiftAmountType, bit_to_extract % element_bitcount); - const bit = @intCast(u1, (self.memory[index] >> sub_index) & 1); - return @intToEnum(AggregateContainerType, bit); - } - - pub fn pop(self: *Self) ?AggregateContainerType { - if (self.peek()) |ty| { - self.len -= 1; - return ty; - } - - return null; - } - }; -} - -/// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as -/// they are encountered. No copies or allocations are performed during parsing and the entire -/// parsing state requires ~40-50 bytes of stack space. -/// -/// Conforms strictly to RFC8259. -/// -/// For a non-byte based wrapper, consider using TokenStream instead. -pub const StreamingParser = struct { - const default_max_nestings = 256; - - // Current state - state: State, - // How many bytes we have counted for the current token - count: usize, - // What state to follow after parsing a string (either property or value string) - after_string_state: State, - // What state to follow after parsing a value (either top-level or value end) - after_value_state: State, - // If we stopped now, would the complete parsed string to now be a valid json string - complete: bool, - // Current token flags to pass through to the next generated, see Token. - string_escapes: StringEscapes, - // When in .String states, was the previous character a high surrogate? - string_last_was_high_surrogate: bool, - // Used inside of StringEscapeHexUnicode* states - string_unicode_codepoint: u21, - // The first byte needs to be stored to validate 3- and 4-byte sequences. - sequence_first_byte: u8 = undefined, - // When in .Number states, is the number a (still) valid integer? - number_is_integer: bool, - // Bit-stack for nested object/map literals (max 256 nestings). - stack: AggregateContainerStack(default_max_nestings), - - pub fn init() StreamingParser { - var p: StreamingParser = undefined; - p.reset(); - return p; - } - - pub fn reset(p: *StreamingParser) void { - p.state = .TopLevelBegin; - p.count = 0; - // Set before ever read in main transition function - p.after_string_state = undefined; - p.after_value_state = .ValueEnd; // handle end of values normally - p.stack.init(); - p.complete = false; - p.string_escapes = undefined; - p.string_last_was_high_surrogate = undefined; - p.string_unicode_codepoint = undefined; - p.number_is_integer = undefined; - } - - pub const State = enum(u8) { - // These must be first with these explicit values as we rely on them for indexing the - // bit-stack directly and avoiding a branch. - ObjectSeparator = 0, - ValueEnd = 1, - - TopLevelBegin, - TopLevelEnd, - - ValueBegin, - ValueBeginNoClosing, - - String, - StringUtf8Byte2Of2, - StringUtf8Byte2Of3, - StringUtf8Byte3Of3, - StringUtf8Byte2Of4, - StringUtf8Byte3Of4, - StringUtf8Byte4Of4, - StringEscapeCharacter, - StringEscapeHexUnicode4, - StringEscapeHexUnicode3, - StringEscapeHexUnicode2, - StringEscapeHexUnicode1, - - Number, - NumberMaybeDotOrExponent, - NumberMaybeDigitOrDotOrExponent, - NumberFractionalRequired, - NumberFractional, - NumberMaybeExponent, - NumberExponent, - NumberExponentDigitsRequired, - NumberExponentDigits, - - TrueLiteral1, - TrueLiteral2, - TrueLiteral3, - - FalseLiteral1, - FalseLiteral2, - FalseLiteral3, - FalseLiteral4, - - NullLiteral1, - NullLiteral2, - NullLiteral3, - - // Given an aggregate container type, return the state which should be entered after - // processing a complete value type. - pub fn fromAggregateContainerType(ty: AggregateContainerType) State { - comptime { - std.debug.assert(@enumToInt(AggregateContainerType.object) == @enumToInt(State.ObjectSeparator)); - std.debug.assert(@enumToInt(AggregateContainerType.array) == @enumToInt(State.ValueEnd)); - } - - return @intToEnum(State, @enumToInt(ty)); - } - }; - - pub const Error = error{ - InvalidTopLevel, - TooManyNestedItems, - TooManyClosingItems, - InvalidValueBegin, - InvalidValueEnd, - UnbalancedBrackets, - UnbalancedBraces, - UnexpectedClosingBracket, - UnexpectedClosingBrace, - InvalidNumber, - InvalidSeparator, - InvalidLiteral, - InvalidEscapeCharacter, - InvalidUnicodeHexSymbol, - InvalidUtf8Byte, - InvalidTopLevelTrailing, - InvalidControlCharacter, - }; - - /// Give another byte to the parser and obtain any new tokens. This may (rarely) return two - /// tokens. token2 is always null if token1 is null. - /// - /// There is currently no error recovery on a bad stream. - pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void { - token1.* = null; - token2.* = null; - p.count += 1; - - // unlikely - if (try p.transition(c, token1)) { - _ = try p.transition(c, token2); - } - } - - // Perform a single transition on the state machine and return any possible token. - fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool { - switch (p.state) { - .TopLevelBegin => switch (c) { - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - '"' => { - p.state = .String; - p.after_value_state = .TopLevelEnd; - // We don't actually need the following since after_value_state should override. - p.after_string_state = .ValueEnd; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.after_value_state = .TopLevelEnd; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidTopLevel; - }, - }, - - .TopLevelEnd => switch (c) { - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidTopLevelTrailing; - }, - }, - - .ValueBegin => switch (c) { - // NOTE: These are shared in ValueEnd as well, think we can reorder states to - // be a bit clearer and avoid this duplication. - '}' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .object) { - return error.UnexpectedClosingBrace; - } - - _ = p.stack.pop(); - p.state = .ValueBegin; - p.after_string_state = State.fromAggregateContainerType(last_type); - - switch (p.stack.len) { - 0 => { - p.complete = true; - p.state = .TopLevelEnd; - }, - else => { - p.state = .ValueEnd; - }, - } - - token.* = Token.ObjectEnd; - }, - ']' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .array) { - return error.UnexpectedClosingBracket; - } - - _ = p.stack.pop(); - p.state = .ValueBegin; - p.after_string_state = State.fromAggregateContainerType(last_type); - - switch (p.stack.len) { - 0 => { - p.complete = true; - p.state = .TopLevelEnd; - }, - else => { - p.state = .ValueEnd; - }, - } - - token.* = Token.ArrayEnd; - }, - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.count = 0; - }, - '"' => { - p.state = .String; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueBegin; - }, - }, - - // TODO: A bit of duplication here and in the following state, redo. - .ValueBeginNoClosing => switch (c) { - '{' => { - p.stack.push(.object) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ObjectSeparator; - - token.* = Token.ObjectBegin; - }, - '[' => { - p.stack.push(.array) orelse return error.TooManyNestedItems; - - p.state = .ValueBegin; - p.after_string_state = .ValueEnd; - - token.* = Token.ArrayBegin; - }, - '-' => { - p.number_is_integer = true; - p.state = .Number; - p.count = 0; - }, - '0' => { - p.number_is_integer = true; - p.state = .NumberMaybeDotOrExponent; - p.count = 0; - }, - '1'...'9' => { - p.number_is_integer = true; - p.state = .NumberMaybeDigitOrDotOrExponent; - p.count = 0; - }, - '"' => { - p.state = .String; - p.string_escapes = .None; - p.string_last_was_high_surrogate = false; - p.count = 0; - }, - 't' => { - p.state = .TrueLiteral1; - p.count = 0; - }, - 'f' => { - p.state = .FalseLiteral1; - p.count = 0; - }, - 'n' => { - p.state = .NullLiteral1; - p.count = 0; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueBegin; - }, - }, - - .ValueEnd => switch (c) { - ',' => { - const last_type = p.stack.peek() orelse unreachable; - p.after_string_state = State.fromAggregateContainerType(last_type); - p.state = .ValueBeginNoClosing; - }, - ']' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .array) { - return error.UnexpectedClosingBracket; - } - - _ = p.stack.pop(); - p.state = .ValueEnd; - p.after_string_state = State.fromAggregateContainerType(last_type); - - if (p.stack.len == 0) { - p.complete = true; - p.state = .TopLevelEnd; - } - - token.* = Token.ArrayEnd; - }, - '}' => { - const last_type = p.stack.peek() orelse return error.TooManyClosingItems; - - if (last_type != .object) { - return error.UnexpectedClosingBrace; - } - - _ = p.stack.pop(); - p.state = .ValueEnd; - p.after_string_state = State.fromAggregateContainerType(last_type); - - if (p.stack.len == 0) { - p.complete = true; - p.state = .TopLevelEnd; - } - - token.* = Token.ObjectEnd; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidValueEnd; - }, - }, - - .ObjectSeparator => switch (c) { - ':' => { - p.state = .ValueBeginNoClosing; - p.after_string_state = .ValueEnd; - }, - 0x09, 0x0A, 0x0D, 0x20 => { - // whitespace - }, - else => { - return error.InvalidSeparator; - }, - }, - - .String => switch (c) { - 0x00...0x1F => { - return error.InvalidControlCharacter; - }, - '"' => { - p.state = p.after_string_state; - if (p.after_value_state == .TopLevelEnd) { - p.state = .TopLevelEnd; - p.complete = true; - } - - token.* = .{ - .String = .{ - .count = p.count - 1, - .escapes = p.string_escapes, - }, - }; - p.string_escapes = undefined; - p.string_last_was_high_surrogate = undefined; - }, - '\\' => { - p.state = .StringEscapeCharacter; - switch (p.string_escapes) { - .None => { - p.string_escapes = .{ .Some = .{ .size_diff = 0 } }; - }, - .Some => {}, - } - }, - 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => { - // non-control ascii - p.string_last_was_high_surrogate = false; - }, - 0xC2...0xDF => { - p.state = .StringUtf8Byte2Of2; - }, - 0xE0...0xEF => { - p.state = .StringUtf8Byte2Of3; - p.sequence_first_byte = c; - }, - 0xF0...0xF4 => { - p.state = .StringUtf8Byte2Of4; - p.sequence_first_byte = c; - }, - else => { - return error.InvalidUtf8Byte; - }, - }, - - .StringUtf8Byte2Of2 => switch (c >> 6) { - 0b10 => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte2Of3 => { - switch (p.sequence_first_byte) { - 0xE0 => switch (c) { - 0xA0...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xE1...0xEF => switch (c) { - 0x80...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - else => return error.InvalidUtf8Byte, - } - p.state = .StringUtf8Byte3Of3; - }, - .StringUtf8Byte3Of3 => switch (c) { - 0x80...0xBF => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte2Of4 => { - switch (p.sequence_first_byte) { - 0xF0 => switch (c) { - 0x90...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xF1...0xF3 => switch (c) { - 0x80...0xBF => {}, - else => return error.InvalidUtf8Byte, - }, - 0xF4 => switch (c) { - 0x80...0x8F => {}, - else => return error.InvalidUtf8Byte, - }, - else => return error.InvalidUtf8Byte, - } - p.state = .StringUtf8Byte3Of4; - }, - .StringUtf8Byte3Of4 => switch (c) { - 0x80...0xBF => p.state = .StringUtf8Byte4Of4, - else => return error.InvalidUtf8Byte, - }, - .StringUtf8Byte4Of4 => switch (c) { - 0x80...0xBF => p.state = .String, - else => return error.InvalidUtf8Byte, - }, - - .StringEscapeCharacter => switch (c) { - // NOTE: '/' is allowed as an escaped character but it also is allowed - // as unescaped according to the RFC. There is a reported errata which suggests - // removing the non-escaped variant but it makes more sense to simply disallow - // it as an escape code here. - // - // The current JSONTestSuite tests rely on both of this behaviour being present - // however, so we default to the status quo where both are accepted until this - // is further clarified. - '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => { - p.string_escapes.Some.size_diff -= 1; - p.state = .String; - p.string_last_was_high_surrogate = false; - }, - 'u' => { - p.state = .StringEscapeHexUnicode4; - }, - else => { - return error.InvalidEscapeCharacter; - }, - }, - - .StringEscapeHexUnicode4 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode3; - p.string_unicode_codepoint = codepoint << 12; - }, - - .StringEscapeHexUnicode3 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode2; - p.string_unicode_codepoint |= codepoint << 8; - }, - - .StringEscapeHexUnicode2 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .StringEscapeHexUnicode1; - p.string_unicode_codepoint |= codepoint << 4; - }, - - .StringEscapeHexUnicode1 => { - var codepoint: u21 = undefined; - switch (c) { - else => return error.InvalidUnicodeHexSymbol, - '0'...'9' => { - codepoint = c - '0'; - }, - 'A'...'F' => { - codepoint = c - 'A' + 10; - }, - 'a'...'f' => { - codepoint = c - 'a' + 10; - }, - } - p.state = .String; - p.string_unicode_codepoint |= codepoint; - if (p.string_unicode_codepoint < 0xD800 or p.string_unicode_codepoint >= 0xE000) { - // not part of surrogate pair - p.string_escapes.Some.size_diff -= @as(isize, 6 - (std.unicode.utf8CodepointSequenceLength(p.string_unicode_codepoint) catch unreachable)); - p.string_last_was_high_surrogate = false; - } else if (p.string_unicode_codepoint < 0xDC00) { - // 'high' surrogate - // takes 3 bytes to encode a half surrogate pair into wtf8 - p.string_escapes.Some.size_diff -= 6 - 3; - p.string_last_was_high_surrogate = true; - } else { - // 'low' surrogate - p.string_escapes.Some.size_diff -= 6; - if (p.string_last_was_high_surrogate) { - // takes 4 bytes to encode a full surrogate pair into utf8 - // 3 bytes are already reserved by high surrogate - p.string_escapes.Some.size_diff -= -1; - } else { - // takes 3 bytes to encode a half surrogate pair into wtf8 - p.string_escapes.Some.size_diff -= -3; - } - p.string_last_was_high_surrogate = false; - } - p.string_unicode_codepoint = undefined; - }, - - .Number => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0' => { - p.state = .NumberMaybeDotOrExponent; - }, - '1'...'9' => { - p.state = .NumberMaybeDigitOrDotOrExponent; - }, - else => { - return error.InvalidNumber; - }, - } - }, - - .NumberMaybeDotOrExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '.' => { - p.number_is_integer = false; - p.state = .NumberFractionalRequired; - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - p.number_is_integer = undefined; - return true; - }, - } - }, - - .NumberMaybeDigitOrDotOrExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '.' => { - p.number_is_integer = false; - p.state = .NumberFractionalRequired; - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - '0'...'9' => { - // another digit - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberFractionalRequired => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - p.state = .NumberFractional; - }, - else => { - return error.InvalidNumber; - }, - } - }, - - .NumberFractional => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - // another digit - }, - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberMaybeExponent => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - 'e', 'E' => { - p.number_is_integer = false; - p.state = .NumberExponent; - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .NumberExponent => switch (c) { - '-', '+' => { - p.complete = false; - p.state = .NumberExponentDigitsRequired; - }, - '0'...'9' => { - p.complete = p.after_value_state == .TopLevelEnd; - p.state = .NumberExponentDigits; - }, - else => { - return error.InvalidNumber; - }, - }, - - .NumberExponentDigitsRequired => switch (c) { - '0'...'9' => { - p.complete = p.after_value_state == .TopLevelEnd; - p.state = .NumberExponentDigits; - }, - else => { - return error.InvalidNumber; - }, - }, - - .NumberExponentDigits => { - p.complete = p.after_value_state == .TopLevelEnd; - switch (c) { - '0'...'9' => { - // another digit - }, - else => { - p.state = p.after_value_state; - token.* = .{ - .Number = .{ - .count = p.count, - .is_integer = p.number_is_integer, - }, - }; - return true; - }, - } - }, - - .TrueLiteral1 => switch (c) { - 'r' => p.state = .TrueLiteral2, - else => return error.InvalidLiteral, - }, - - .TrueLiteral2 => switch (c) { - 'u' => p.state = .TrueLiteral3, - else => return error.InvalidLiteral, - }, - - .TrueLiteral3 => switch (c) { - 'e' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.True; - }, - else => { - return error.InvalidLiteral; - }, - }, - - .FalseLiteral1 => switch (c) { - 'a' => p.state = .FalseLiteral2, - else => return error.InvalidLiteral, - }, - - .FalseLiteral2 => switch (c) { - 'l' => p.state = .FalseLiteral3, - else => return error.InvalidLiteral, - }, - - .FalseLiteral3 => switch (c) { - 's' => p.state = .FalseLiteral4, - else => return error.InvalidLiteral, - }, - - .FalseLiteral4 => switch (c) { - 'e' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.False; - }, - else => { - return error.InvalidLiteral; - }, - }, - - .NullLiteral1 => switch (c) { - 'u' => p.state = .NullLiteral2, - else => return error.InvalidLiteral, - }, - - .NullLiteral2 => switch (c) { - 'l' => p.state = .NullLiteral3, - else => return error.InvalidLiteral, - }, - - .NullLiteral3 => switch (c) { - 'l' => { - p.state = p.after_value_state; - p.complete = p.state == .TopLevelEnd; - token.* = Token.Null; - }, - else => { - return error.InvalidLiteral; - }, - }, - } - - return false; - } -}; - -/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens. -pub const TokenStream = struct { - i: usize, - slice: []const u8, - parser: StreamingParser, - token: ?Token, - - pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson}; - - pub fn init(slice: []const u8) TokenStream { - return TokenStream{ - .i = 0, - .slice = slice, - .parser = StreamingParser.init(), - .token = null, - }; - } - - fn stackUsed(self: *TokenStream) usize { - return self.parser.stack.len + if (self.token != null) @as(usize, 1) else 0; - } - - pub fn next(self: *TokenStream) Error!?Token { - if (self.token) |token| { - self.token = null; - return token; - } - - var t1: ?Token = undefined; - var t2: ?Token = undefined; - - while (self.i < self.slice.len) { - try self.parser.feed(self.slice[self.i], &t1, &t2); - self.i += 1; - - if (t1) |token| { - self.token = t2; - return token; - } - } - - // Without this a bare number fails, the streaming parser doesn't know the input ended - try self.parser.feed(' ', &t1, &t2); - self.i += 1; - - if (t1) |token| { - return token; - } else if (self.parser.complete) { - return null; - } else { - return error.UnexpectedEndOfJson; - } - } -}; - -/// Validate a JSON string. This does not limit number precision so a decoder may not necessarily -/// be able to decode the string even if this returns true. -pub fn validate(s: []const u8) bool { - var p = StreamingParser.init(); - - for (s) |c| { - var token1: ?Token = undefined; - var token2: ?Token = undefined; - - p.feed(c, &token1, &token2) catch { - return false; - }; - } - - return p.complete; -} - -const Allocator = std.mem.Allocator; -const ArenaAllocator = std.heap.ArenaAllocator; -const ArrayList = std.ArrayList; -const StringArrayHashMap = std.StringArrayHashMap; - -pub const ValueTree = struct { - arena: *ArenaAllocator, - root: Value, - - pub fn deinit(self: *ValueTree) void { - self.arena.deinit(); - self.arena.child_allocator.destroy(self.arena); - } -}; - -pub const ObjectMap = StringArrayHashMap(Value); -pub const Array = ArrayList(Value); - -/// Represents a JSON value -/// Currently only supports numbers that fit into i64 or f64. -pub const Value = union(enum) { - Null, - Bool: bool, - Integer: i64, - Float: f64, - NumberString: []const u8, - String: []const u8, - Array: Array, - Object: ObjectMap, - - pub fn jsonStringify( - value: @This(), - options: StringifyOptions, - out_stream: anytype, - ) @TypeOf(out_stream).Error!void { - switch (value) { - .Null => try stringify(null, options, out_stream), - .Bool => |inner| try stringify(inner, options, out_stream), - .Integer => |inner| try stringify(inner, options, out_stream), - .Float => |inner| try stringify(inner, options, out_stream), - .NumberString => |inner| try out_stream.writeAll(inner), - .String => |inner| try stringify(inner, options, out_stream), - .Array => |inner| try stringify(inner.items, options, out_stream), - .Object => |inner| { - try out_stream.writeByte('{'); - var field_output = false; - var child_options = options; - if (child_options.whitespace) |*child_whitespace| { - child_whitespace.indent_level += 1; - } - var it = inner.iterator(); - while (it.next()) |entry| { - if (!field_output) { - field_output = true; - } else { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - - try stringify(entry.key_ptr.*, options, out_stream); - try out_stream.writeByte(':'); - if (child_options.whitespace) |child_whitespace| { - if (child_whitespace.separator) { - try out_stream.writeByte(' '); - } - } - try stringify(entry.value_ptr.*, child_options, out_stream); - } - if (field_output) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte('}'); - }, - } - } - - pub fn dump(self: Value) void { - std.debug.getStderrMutex().lock(); - defer std.debug.getStderrMutex().unlock(); - - const stderr = std.io.getStdErr().writer(); - std.json.stringify(self, std.json.StringifyOptions{ .whitespace = null }, stderr) catch return; - } -}; - -/// parse tokens from a stream, returning `false` if they do not decode to `value` -fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool { - // TODO: should be able to write this function to not require an allocator - const tmp = try parse(T, tokens, options); - defer parseFree(T, tmp, options); - - return parsedEqual(tmp, value); -} - -/// Returns if a value returned by `parse` is deep-equal to another value -fn parsedEqual(a: anytype, b: @TypeOf(a)) bool { - switch (@typeInfo(@TypeOf(a))) { - .Optional => { - if (a == null and b == null) return true; - if (a == null or b == null) return false; - return parsedEqual(a.?, b.?); - }, - .Union => |info| { - if (info.tag_type) |UnionTag| { - const tag_a = std.meta.activeTag(a); - const tag_b = std.meta.activeTag(b); - if (tag_a != tag_b) return false; - - inline for (info.fields) |field_info| { - if (@field(UnionTag, field_info.name) == tag_a) { - return parsedEqual(@field(a, field_info.name), @field(b, field_info.name)); - } - } - return false; - } else { - unreachable; - } - }, - .Array => { - for (a, 0..) |e, i| - if (!parsedEqual(e, b[i])) return false; - return true; - }, - .Struct => |info| { - inline for (info.fields) |field_info| { - if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false; - } - return true; - }, - .Pointer => |ptrInfo| switch (ptrInfo.size) { - .One => return parsedEqual(a.*, b.*), - .Slice => { - if (a.len != b.len) return false; - for (a, 0..) |e, i| - if (!parsedEqual(e, b[i])) return false; - return true; - }, - .Many, .C => unreachable, - }, - else => return a == b, - } - unreachable; -} - -pub const ParseOptions = struct { - allocator: ?Allocator = null, - - /// Behaviour when a duplicate field is encountered. - duplicate_field_behavior: enum { - UseFirst, - Error, - UseLast, - } = .Error, - - /// If false, finding an unknown field returns an error. - ignore_unknown_fields: bool = false, - - allow_trailing_data: bool = false, -}; - -const SkipValueError = error{UnexpectedJsonDepth} || TokenStream.Error; - -fn skipValue(tokens: *TokenStream) SkipValueError!void { - const original_depth = tokens.stackUsed(); - - // Return an error if no value is found - _ = try tokens.next(); - if (tokens.stackUsed() < original_depth) return error.UnexpectedJsonDepth; - if (tokens.stackUsed() == original_depth) return; - - while (try tokens.next()) |_| { - if (tokens.stackUsed() == original_depth) return; - } -} - -fn ParseInternalError(comptime T: type) type { - // `inferred_types` is used to avoid infinite recursion for recursive type definitions. - const inferred_types = [_]type{}; - return ParseInternalErrorImpl(T, &inferred_types); -} - -fn ParseInternalErrorImpl(comptime T: type, comptime inferred_types: []const type) type { - for (inferred_types) |ty| { - if (T == ty) return error{}; - } - - switch (@typeInfo(T)) { - .Bool => return error{UnexpectedToken}, - .Float, .ComptimeFloat => return error{UnexpectedToken} || std.fmt.ParseFloatError, - .Int, .ComptimeInt => { - return error{ UnexpectedToken, InvalidNumber, Overflow } || - std.fmt.ParseIntError || std.fmt.ParseFloatError; - }, - .Optional => |optionalInfo| { - return ParseInternalErrorImpl(optionalInfo.child, inferred_types ++ [_]type{T}); - }, - .Enum => return error{ UnexpectedToken, InvalidEnumTag } || std.fmt.ParseIntError || - std.meta.IntToEnumError || std.meta.IntToEnumError, - .Union => |unionInfo| { - if (unionInfo.tag_type) |_| { - var errors = error{NoUnionMembersMatched}; - for (unionInfo.fields) |u_field| { - errors = errors || ParseInternalErrorImpl(u_field.type, inferred_types ++ [_]type{T}); - } - return errors; - } else { - @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |structInfo| { - var errors = error{ - DuplicateJSONField, - UnexpectedEndOfJson, - UnexpectedToken, - UnexpectedValue, - UnknownField, - MissingField, - } || SkipValueError || TokenStream.Error; - for (structInfo.fields) |field| { - errors = errors || ParseInternalErrorImpl(field.type, inferred_types ++ [_]type{T}); - } - return errors; - }, - .Array => |arrayInfo| { - return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error || - UnescapeValidStringError || - ParseInternalErrorImpl(arrayInfo.child, inferred_types ++ [_]type{T}); - }, - .Vector => |vecInfo| { - return error{ UnexpectedEndOfJson, UnexpectedToken, LengthMismatch } || TokenStream.Error || - UnescapeValidStringError || - ParseInternalErrorImpl(vecInfo.child, inferred_types ++ [_]type{T}); - }, - .Pointer => |ptrInfo| { - var errors = error{AllocatorRequired} || std.mem.Allocator.Error; - switch (ptrInfo.size) { - .One => { - return errors || ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}); - }, - .Slice => { - return errors || error{ UnexpectedEndOfJson, UnexpectedToken } || - ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}) || - UnescapeValidStringError || TokenStream.Error; - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - }, - else => return error{}, - } - unreachable; -} - -fn parseInternalArray( - comptime T: type, - comptime Elt: type, - comptime arr_len: usize, - tokens: *TokenStream, - options: ParseOptions, -) ParseInternalError(T)!T { - var r: T = undefined; - var i: usize = 0; - var child_options = options; - child_options.allow_trailing_data = true; - errdefer { - // Without the r.len check `r[i]` is not allowed - if (arr_len > 0) while (true) : (i -= 1) { - parseFree(Elt, r[i], options); - if (i == 0) break; - }; - } - if (arr_len > 0) while (i < arr_len) : (i += 1) { - r[i] = try parse(Elt, tokens, child_options); - }; - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => {}, - else => return error.UnexpectedToken, - } - return r; -} - -fn parseInternal( - comptime T: type, - token: Token, - tokens: *TokenStream, - options: ParseOptions, -) ParseInternalError(T)!T { - switch (@typeInfo(T)) { - .Bool => { - return switch (token) { - .True => true, - .False => false, - else => error.UnexpectedToken, - }; - }, - .Float, .ComptimeFloat => { - switch (token) { - .Number => |numberToken| return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)), - .String => |stringToken| return try std.fmt.parseFloat(T, stringToken.slice(tokens.slice, tokens.i - 1)), - else => return error.UnexpectedToken, - } - }, - .Int, .ComptimeInt => { - switch (token) { - .Number => |numberToken| { - if (numberToken.is_integer) - return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10); - const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1)); - if (@round(float) != float) return error.InvalidNumber; - if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow; - return @floatToInt(T, float); - }, - .String => |stringToken| { - return std.fmt.parseInt(T, stringToken.slice(tokens.slice, tokens.i - 1), 10) catch |err| { - switch (err) { - error.Overflow => return err, - error.InvalidCharacter => { - const float = try std.fmt.parseFloat(f128, stringToken.slice(tokens.slice, tokens.i - 1)); - if (@round(float) != float) return error.InvalidNumber; - if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow; - return @floatToInt(T, float); - }, - } - }; - }, - else => return error.UnexpectedToken, - } - }, - .Optional => |optionalInfo| { - if (token == .Null) { - return null; - } else { - return try parseInternal(optionalInfo.child, token, tokens, options); - } - }, - .Enum => |enumInfo| { - switch (token) { - .Number => |numberToken| { - if (!numberToken.is_integer) return error.UnexpectedToken; - const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10); - return try std.meta.intToEnum(T, n); - }, - .String => |stringToken| { - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - switch (stringToken.escapes) { - .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag, - .Some => { - inline for (enumInfo.fields) |field| { - if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) { - return @field(T, field.name); - } - } - return error.InvalidEnumTag; - }, - } - }, - else => return error.UnexpectedToken, - } - }, - .Union => |unionInfo| { - if (unionInfo.tag_type) |_| { - // try each of the union fields until we find one that matches - inline for (unionInfo.fields) |u_field| { - // take a copy of tokens so we can withhold mutations until success - var tokens_copy = tokens.*; - if (parseInternal(u_field.type, token, &tokens_copy, options)) |value| { - tokens.* = tokens_copy; - return @unionInit(T, u_field.name, value); - } else |err| { - // Bubble up error.OutOfMemory - // Parsing some types won't have OutOfMemory in their - // error-sets, for the condition to be valid, merge it in. - if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err; - // Bubble up AllocatorRequired, as it indicates missing option - if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err; - // otherwise continue through the `inline for` - } - } - return error.NoUnionMembersMatched; - } else { - @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |structInfo| { - if (structInfo.is_tuple) { - switch (token) { - .ArrayBegin => {}, - else => return error.UnexpectedToken, - } - var r: T = undefined; - var child_options = options; - child_options.allow_trailing_data = true; - var fields_seen: usize = 0; - errdefer { - inline for (0..structInfo.fields.len) |i| { - if (i < fields_seen) { - parseFree(structInfo.fields[i].type, r[i], options); - } - } - } - inline for (0..structInfo.fields.len) |i| { - r[i] = try parse(structInfo.fields[i].type, tokens, child_options); - fields_seen = i + 1; - } - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => {}, - else => return error.UnexpectedToken, - } - return r; - } - - switch (token) { - .ObjectBegin => {}, - else => return error.UnexpectedToken, - } - var r: T = undefined; - var fields_seen = [_]bool{false} ** structInfo.fields.len; - errdefer { - inline for (structInfo.fields, 0..) |field, i| { - if (fields_seen[i] and !field.is_comptime) { - parseFree(field.type, @field(r, field.name), options); - } - } - } - - while (true) { - switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) { - .ObjectEnd => break, - .String => |stringToken| { - const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - var child_options = options; - child_options.allow_trailing_data = true; - var found = false; - inline for (structInfo.fields, 0..) |field, i| { - if (switch (stringToken.escapes) { - .None => mem.eql(u8, field.name, key_source_slice), - .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)), - }) { - if (fields_seen[i]) { - switch (options.duplicate_field_behavior) { - .UseFirst => { - // unconditionally ignore value. for comptime fields, this skips check against default_value - parseFree(field.type, try parse(field.type, tokens, child_options), child_options); - found = true; - break; - }, - .Error => return error.DuplicateJSONField, - .UseLast => { - if (!field.is_comptime) { - parseFree(field.type, @field(r, field.name), child_options); - } - fields_seen[i] = false; - }, - } - } - if (field.is_comptime) { - if (!try parsesTo(field.type, @ptrCast(*align(1) const field.type, field.default_value.?).*, tokens, child_options)) { - return error.UnexpectedValue; - } - } else { - @field(r, field.name) = try parse(field.type, tokens, child_options); - } - fields_seen[i] = true; - found = true; - break; - } - } - if (!found) { - if (options.ignore_unknown_fields) { - try skipValue(tokens); - continue; - } else { - return error.UnknownField; - } - } - }, - else => return error.UnexpectedToken, - } - } - inline for (structInfo.fields, 0..) |field, i| { - if (!fields_seen[i]) { - if (field.default_value) |default_ptr| { - if (!field.is_comptime) { - const default = @ptrCast(*align(1) const field.type, default_ptr).*; - @field(r, field.name) = default; - } - } else { - return error.MissingField; - } - } - } - return r; - }, - .Array => |arrayInfo| { - switch (token) { - .ArrayBegin => { - const len = @typeInfo(T).Array.len; - return parseInternalArray(T, arrayInfo.child, len, tokens, options); - }, - .String => |stringToken| { - if (arrayInfo.child != u8) return error.UnexpectedToken; - var r: T = undefined; - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - if (r.len != stringToken.decodedLength()) return error.LengthMismatch; - switch (stringToken.escapes) { - .None => @memcpy(r[0..source_slice.len], source_slice), - .Some => try unescapeValidString(&r, source_slice), - } - return r; - }, - else => return error.UnexpectedToken, - } - }, - .Vector => |vecInfo| { - switch (token) { - .ArrayBegin => { - const len = @typeInfo(T).Vector.len; - return parseInternalArray(T, vecInfo.child, len, tokens, options); - }, - else => return error.UnexpectedToken, - } - }, - .Pointer => |ptrInfo| { - const allocator = options.allocator orelse return error.AllocatorRequired; - switch (ptrInfo.size) { - .One => { - const r: *ptrInfo.child = try allocator.create(ptrInfo.child); - errdefer allocator.destroy(r); - r.* = try parseInternal(ptrInfo.child, token, tokens, options); - return r; - }, - .Slice => { - switch (token) { - .ArrayBegin => { - var arraylist = std.ArrayList(ptrInfo.child).init(allocator); - errdefer { - while (arraylist.popOrNull()) |v| { - parseFree(ptrInfo.child, v, options); - } - arraylist.deinit(); - } - - while (true) { - const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - switch (tok) { - .ArrayEnd => break, - else => {}, - } - - try arraylist.ensureUnusedCapacity(1); - const v = try parseInternal(ptrInfo.child, tok, tokens, options); - arraylist.appendAssumeCapacity(v); - } - - if (ptrInfo.sentinel) |some| { - const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*; - return try arraylist.toOwnedSliceSentinel(sentinel_value); - } - - return try arraylist.toOwnedSlice(); - }, - .String => |stringToken| { - if (ptrInfo.child != u8) return error.UnexpectedToken; - const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); - const len = stringToken.decodedLength(); - const output = if (ptrInfo.sentinel) |sentinel_ptr| - try allocator.allocSentinel(u8, len, @ptrCast(*const u8, sentinel_ptr).*) - else - try allocator.alloc(u8, len); - errdefer allocator.free(output); - switch (stringToken.escapes) { - .None => @memcpy(output[0..source_slice.len], source_slice), - .Some => try unescapeValidString(output, source_slice), - } - - return output; - }, - else => return error.UnexpectedToken, - } - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - }, - else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), - } - unreachable; -} - -pub fn ParseError(comptime T: type) type { - return ParseInternalError(T) || error{UnexpectedEndOfJson} || TokenStream.Error; -} - -pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) ParseError(T)!T { - const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson; - const r = try parseInternal(T, token, tokens, options); - errdefer parseFree(T, r, options); - if (!options.allow_trailing_data) { - if ((try tokens.next()) != null) unreachable; - assert(tokens.i >= tokens.slice.len); - } - return r; -} - -/// Releases resources created by `parse`. -/// Should be called with the same type and `ParseOptions` that were passed to `parse` -pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { - switch (@typeInfo(T)) { - .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {}, - .Optional => { - if (value) |v| { - return parseFree(@TypeOf(v), v, options); - } - }, - .Union => |unionInfo| { - if (unionInfo.tag_type) |UnionTagType| { - inline for (unionInfo.fields) |u_field| { - if (value == @field(UnionTagType, u_field.name)) { - parseFree(u_field.type, @field(value, u_field.name), options); - break; - } - } - } else { - unreachable; - } - }, - .Struct => |structInfo| { - inline for (structInfo.fields) |field| { - if (!field.is_comptime) { - var should_free = true; - if (field.default_value) |default| { - switch (@typeInfo(field.type)) { - // We must not attempt to free pointers to struct default values - .Pointer => |fieldPtrInfo| { - const field_value = @field(value, field.name); - const field_ptr = switch (fieldPtrInfo.size) { - .One => field_value, - .Slice => field_value.ptr, - else => unreachable, // Other pointer types are not parseable - }; - const field_addr = @ptrToInt(field_ptr); - - const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*; - const default_ptr = switch (fieldPtrInfo.size) { - .One => casted_default, - .Slice => casted_default.ptr, - else => unreachable, // Other pointer types are not parseable - }; - const default_addr = @ptrToInt(default_ptr); - - if (field_addr == default_addr) { - should_free = false; - } - }, - else => {}, - } - } - if (should_free) { - parseFree(field.type, @field(value, field.name), options); - } - } - } - }, - .Array => |arrayInfo| { - for (value) |v| { - parseFree(arrayInfo.child, v, options); - } - }, - .Vector => |vecInfo| { - var i: usize = 0; - var v_len: usize = @typeInfo(@TypeOf(value)).Vector.len; - while (i < v_len) : (i += 1) { - parseFree(vecInfo.child, value[i], options); - } - }, - .Pointer => |ptrInfo| { - const allocator = options.allocator orelse unreachable; - switch (ptrInfo.size) { - .One => { - parseFree(ptrInfo.child, value.*, options); - allocator.destroy(value); - }, - .Slice => { - for (value) |v| { - parseFree(ptrInfo.child, v, options); - } - allocator.free(value); - }, - else => unreachable, - } - }, - else => unreachable, - } -} - -/// A non-stream JSON parser which constructs a tree of Value's. -pub const Parser = struct { - allocator: Allocator, - state: State, - copy_strings: bool, - // Stores parent nodes and un-combined Values. - stack: Array, - - const State = enum { - ObjectKey, - ObjectValue, - ArrayValue, - Simple, - }; - - pub fn init(allocator: Allocator, copy_strings: bool) Parser { - return Parser{ - .allocator = allocator, - .state = .Simple, - .copy_strings = copy_strings, - .stack = Array.init(allocator), - }; - } - - pub fn deinit(p: *Parser) void { - p.stack.deinit(); - } - - pub fn reset(p: *Parser) void { - p.state = .Simple; - p.stack.shrinkRetainingCapacity(0); - } - - pub fn parse(p: *Parser, input: []const u8) !ValueTree { - var s = TokenStream.init(input); - - var arena = try p.allocator.create(ArenaAllocator); - errdefer p.allocator.destroy(arena); - - arena.* = ArenaAllocator.init(p.allocator); - errdefer arena.deinit(); - - const allocator = arena.allocator(); - - while (try s.next()) |token| { - try p.transition(allocator, input, s.i - 1, token); - } - - debug.assert(p.stack.items.len == 1); - - return ValueTree{ - .arena = arena, - .root = p.stack.items[0], - }; - } - - // Even though p.allocator exists, we take an explicit allocator so that allocation state - // can be cleaned up on error correctly during a `parse` on call. - fn transition(p: *Parser, allocator: Allocator, input: []const u8, i: usize, token: Token) !void { - switch (p.state) { - .ObjectKey => switch (token) { - .ObjectEnd => { - if (p.stack.items.len == 1) { - return; - } - - var value = p.stack.pop(); - try p.pushToParent(&value); - }, - .String => |s| { - try p.stack.append(try p.parseString(allocator, s, input, i)); - p.state = .ObjectValue; - }, - else => { - // The streaming parser would return an error eventually. - // To prevent invalid state we return an error now. - // TODO make the streaming parser return an error as soon as it encounters an invalid object key - return error.InvalidLiteral; - }, - }, - .ObjectValue => { - var object = &p.stack.items[p.stack.items.len - 2].Object; - var key = p.stack.items[p.stack.items.len - 1].String; - - switch (token) { - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try object.put(key, try p.parseString(allocator, s, input, i)); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .Number => |n| { - try object.put(key, try p.parseNumber(n, input, i)); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .True => { - try object.put(key, Value{ .Bool = true }); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .False => { - try object.put(key, Value{ .Bool = false }); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .Null => { - try object.put(key, Value.Null); - _ = p.stack.pop(); - p.state = .ObjectKey; - }, - .ObjectEnd, .ArrayEnd => { - unreachable; - }, - } - }, - .ArrayValue => { - var array = &p.stack.items[p.stack.items.len - 1].Array; - - switch (token) { - .ArrayEnd => { - if (p.stack.items.len == 1) { - return; - } - - var value = p.stack.pop(); - try p.pushToParent(&value); - }, - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try array.append(try p.parseString(allocator, s, input, i)); - }, - .Number => |n| { - try array.append(try p.parseNumber(n, input, i)); - }, - .True => { - try array.append(Value{ .Bool = true }); - }, - .False => { - try array.append(Value{ .Bool = false }); - }, - .Null => { - try array.append(Value.Null); - }, - .ObjectEnd => { - unreachable; - }, - } - }, - .Simple => switch (token) { - .ObjectBegin => { - try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); - p.state = .ObjectKey; - }, - .ArrayBegin => { - try p.stack.append(Value{ .Array = Array.init(allocator) }); - p.state = .ArrayValue; - }, - .String => |s| { - try p.stack.append(try p.parseString(allocator, s, input, i)); - }, - .Number => |n| { - try p.stack.append(try p.parseNumber(n, input, i)); - }, - .True => { - try p.stack.append(Value{ .Bool = true }); - }, - .False => { - try p.stack.append(Value{ .Bool = false }); - }, - .Null => { - try p.stack.append(Value.Null); - }, - .ObjectEnd, .ArrayEnd => { - unreachable; - }, - }, - } - } - - fn pushToParent(p: *Parser, value: *const Value) !void { - switch (p.stack.items[p.stack.items.len - 1]) { - // Object Parent -> [ ..., object, <key>, value ] - Value.String => |key| { - _ = p.stack.pop(); - - var object = &p.stack.items[p.stack.items.len - 1].Object; - try object.put(key, value.*); - p.state = .ObjectKey; - }, - // Array Parent -> [ ..., <array>, value ] - Value.Array => |*array| { - try array.append(value.*); - p.state = .ArrayValue; - }, - else => { - unreachable; - }, - } - } - - fn parseString(p: *Parser, allocator: Allocator, s: std.meta.TagPayload(Token, Token.String), input: []const u8, i: usize) !Value { - const slice = s.slice(input, i); - switch (s.escapes) { - .None => return Value{ .String = if (p.copy_strings) try allocator.dupe(u8, slice) else slice }, - .Some => { - const output = try allocator.alloc(u8, s.decodedLength()); - errdefer allocator.free(output); - try unescapeValidString(output, slice); - return Value{ .String = output }; - }, - } - } - - fn parseNumber(p: *Parser, n: std.meta.TagPayload(Token, Token.Number), input: []const u8, i: usize) !Value { - _ = p; - return if (n.is_integer) - Value{ - .Integer = std.fmt.parseInt(i64, n.slice(input, i), 10) catch |e| switch (e) { - error.Overflow => return Value{ .NumberString = n.slice(input, i) }, - error.InvalidCharacter => |err| return err, - }, - } - else - Value{ .Float = try std.fmt.parseFloat(f64, n.slice(input, i)) }; - } -}; - -pub const UnescapeValidStringError = error{InvalidUnicodeHexSymbol}; - -/// Unescape a JSON string -/// Only to be used on strings already validated by the parser -/// (note the unreachable statements and lack of bounds checking) -pub fn unescapeValidString(output: []u8, input: []const u8) UnescapeValidStringError!void { - var inIndex: usize = 0; - var outIndex: usize = 0; - - while (inIndex < input.len) { - if (input[inIndex] != '\\') { - // not an escape sequence - output[outIndex] = input[inIndex]; - inIndex += 1; - outIndex += 1; - } else if (input[inIndex + 1] != 'u') { - // a simple escape sequence - output[outIndex] = @as(u8, switch (input[inIndex + 1]) { - '\\' => '\\', - '/' => '/', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - 'f' => 12, - 'b' => 8, - '"' => '"', - else => unreachable, - }); - inIndex += 2; - outIndex += 1; - } else { - // a unicode escape sequence - const firstCodeUnit = std.fmt.parseInt(u16, input[inIndex + 2 .. inIndex + 6], 16) catch unreachable; - - // guess optimistically that it's not a surrogate pair - if (std.unicode.utf8Encode(firstCodeUnit, output[outIndex..])) |byteCount| { - outIndex += byteCount; - inIndex += 6; - } else |err| { - // it might be a surrogate pair - if (err != error.Utf8CannotEncodeSurrogateHalf) { - return error.InvalidUnicodeHexSymbol; - } - // check if a second code unit is present - if (inIndex + 7 >= input.len or input[inIndex + 6] != '\\' or input[inIndex + 7] != 'u') { - return error.InvalidUnicodeHexSymbol; - } - - const secondCodeUnit = std.fmt.parseInt(u16, input[inIndex + 8 .. inIndex + 12], 16) catch unreachable; - - const utf16le_seq = [2]u16{ - mem.nativeToLittle(u16, firstCodeUnit), - mem.nativeToLittle(u16, secondCodeUnit), - }; - if (std.unicode.utf16leToUtf8(output[outIndex..], &utf16le_seq)) |byteCount| { - outIndex += byteCount; - inIndex += 12; - } else |_| { - return error.InvalidUnicodeHexSymbol; - } - } - } - } - assert(outIndex == output.len); -} - -pub const StringifyOptions = struct { - pub const Whitespace = struct { - /// How many indentation levels deep are we? - indent_level: usize = 0, - - /// What character(s) should be used for indentation? - indent: union(enum) { - Space: u8, - Tab: void, - None: void, - } = .{ .Space = 4 }, - - /// After a colon, should whitespace be inserted? - separator: bool = true, - - pub fn outputIndent( - whitespace: @This(), - out_stream: anytype, - ) @TypeOf(out_stream).Error!void { - var char: u8 = undefined; - var n_chars: usize = undefined; - switch (whitespace.indent) { - .Space => |n_spaces| { - char = ' '; - n_chars = n_spaces; - }, - .Tab => { - char = '\t'; - n_chars = 1; - }, - .None => return, - } - try out_stream.writeByte('\n'); - n_chars *= whitespace.indent_level; - try out_stream.writeByteNTimes(char, n_chars); - } - }; - - /// Controls the whitespace emitted - whitespace: ?Whitespace = null, - - /// Should optional fields with null value be written? - emit_null_optional_fields: bool = true, - - string: StringOptions = StringOptions{ .String = .{} }, - - /// Should []u8 be serialised as a string? or an array? - pub const StringOptions = union(enum) { - Array, - String: StringOutputOptions, - - /// String output options - const StringOutputOptions = struct { - /// Should '/' be escaped in strings? - escape_solidus: bool = false, - - /// Should unicode characters be escaped in strings? - escape_unicode: bool = false, - }; - }; -}; - -fn outputUnicodeEscape( - codepoint: u21, - out_stream: anytype, -) !void { - if (codepoint <= 0xFFFF) { - // If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), - // then it may be represented as a six-character sequence: a reverse solidus, followed - // by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point. - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - } else { - assert(codepoint <= 0x10FFFF); - // To escape an extended character that is not in the Basic Multilingual Plane, - // the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair. - const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800; - const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00; - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - try out_stream.writeAll("\\u"); - try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream); - } -} - -/// Write `string` to `writer` as a JSON encoded string. -pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void { - try writer.writeByte('\"'); - try encodeJsonStringChars(string, options, writer); - try writer.writeByte('\"'); -} - -/// Write `chars` to `writer` as JSON encoded string characters. -pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void { - var i: usize = 0; - while (i < chars.len) : (i += 1) { - switch (chars[i]) { - // normal ascii character - 0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c), - // only 2 characters that *must* be escaped - '\\' => try writer.writeAll("\\\\"), - '\"' => try writer.writeAll("\\\""), - // solidus is optional to escape - '/' => { - if (options.string.String.escape_solidus) { - try writer.writeAll("\\/"); - } else { - try writer.writeByte('/'); - } - }, - // control characters with short escapes - // TODO: option to switch between unicode and 'short' forms? - 0x8 => try writer.writeAll("\\b"), - 0xC => try writer.writeAll("\\f"), - '\n' => try writer.writeAll("\\n"), - '\r' => try writer.writeAll("\\r"), - '\t' => try writer.writeAll("\\t"), - else => { - const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable; - // control characters (only things left with 1 byte length) should always be printed as unicode escapes - if (ulen == 1 or options.string.String.escape_unicode) { - const codepoint = std.unicode.utf8Decode(chars[i..][0..ulen]) catch unreachable; - try outputUnicodeEscape(codepoint, writer); - } else { - try writer.writeAll(chars[i..][0..ulen]); - } - i += ulen - 1; - }, - } - } -} - -pub fn stringify( - value: anytype, - options: StringifyOptions, - out_stream: anytype, -) !void { - const T = @TypeOf(value); - switch (@typeInfo(T)) { - .Float, .ComptimeFloat => { - return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream); - }, - .Int, .ComptimeInt => { - return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream); - }, - .Bool => { - return out_stream.writeAll(if (value) "true" else "false"); - }, - .Null => { - return out_stream.writeAll("null"); - }, - .Optional => { - if (value) |payload| { - return try stringify(payload, options, out_stream); - } else { - return try stringify(null, options, out_stream); - } - }, - .Enum => { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - @compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'"); - }, - .Union => { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - const info = @typeInfo(T).Union; - if (info.tag_type) |UnionTagType| { - inline for (info.fields) |u_field| { - if (value == @field(UnionTagType, u_field.name)) { - return try stringify(@field(value, u_field.name), options, out_stream); - } - } - } else { - @compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'"); - } - }, - .Struct => |S| { - if (comptime std.meta.trait.hasFn("jsonStringify")(T)) { - return value.jsonStringify(options, out_stream); - } - - try out_stream.writeByte(if (S.is_tuple) '[' else '{'); - var field_output = false; - var child_options = options; - if (child_options.whitespace) |*child_whitespace| { - child_whitespace.indent_level += 1; - } - inline for (S.fields) |Field| { - // don't include void fields - if (Field.type == void) continue; - - var emit_field = true; - - // don't include optional fields that are null when emit_null_optional_fields is set to false - if (@typeInfo(Field.type) == .Optional) { - if (options.emit_null_optional_fields == false) { - if (@field(value, Field.name) == null) { - emit_field = false; - } - } - } - - if (emit_field) { - if (!field_output) { - field_output = true; - } else { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - if (!S.is_tuple) { - try encodeJsonString(Field.name, options, out_stream); - try out_stream.writeByte(':'); - if (child_options.whitespace) |child_whitespace| { - if (child_whitespace.separator) { - try out_stream.writeByte(' '); - } - } - } - try stringify(@field(value, Field.name), child_options, out_stream); - } - } - if (field_output) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte(if (S.is_tuple) ']' else '}'); - return; - }, - .ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream), - .Pointer => |ptr_info| switch (ptr_info.size) { - .One => switch (@typeInfo(ptr_info.child)) { - .Array => { - const Slice = []const std.meta.Elem(ptr_info.child); - return stringify(@as(Slice, value), options, out_stream); - }, - else => { - // TODO: avoid loops? - return stringify(value.*, options, out_stream); - }, - }, - .Many, .Slice => { - if (ptr_info.size == .Many and ptr_info.sentinel == null) - @compileError("unable to stringify type '" ++ @typeName(T) ++ "' without sentinel"); - const slice = if (ptr_info.size == .Many) mem.span(value) else value; - - if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(slice)) { - try encodeJsonString(slice, options, out_stream); - return; - } - - try out_stream.writeByte('['); - var child_options = options; - if (child_options.whitespace) |*whitespace| { - whitespace.indent_level += 1; - } - for (slice, 0..) |x, i| { - if (i != 0) { - try out_stream.writeByte(','); - } - if (child_options.whitespace) |child_whitespace| { - try child_whitespace.outputIndent(out_stream); - } - try stringify(x, child_options, out_stream); - } - if (slice.len != 0) { - if (options.whitespace) |whitespace| { - try whitespace.outputIndent(out_stream); - } - } - try out_stream.writeByte(']'); - return; - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - }, - .Array => return stringify(&value, options, out_stream), - .Vector => |info| { - const array: [info.len]info.child = value; - return stringify(&array, options, out_stream); - }, - else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"), - } - unreachable; -} - -// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer. -// Caller owns returned memory. -pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 { - var list = std.ArrayList(u8).init(allocator); - errdefer list.deinit(); - try stringify(value, options, list.writer()); - return list.toOwnedSlice(); -} +// Deprecations +pub const parse = @compileError("Deprecated; use parseFromSlice() or parseFromTokenSource() instead."); +pub const StreamingParser = @compileError("Deprecated; use json.Scanner or json.Reader instead."); +pub const TokenStream = @compileError("Deprecated; use json.Scanner or json.Reader instead."); test { _ = @import("json/test.zig"); + _ = @import("json/scanner.zig"); _ = @import("json/write_stream.zig"); -} - -test "stringify null optional fields" { - const MyStruct = struct { - optional: ?[]const u8 = null, - required: []const u8 = "something", - another_optional: ?[]const u8 = null, - another_required: []const u8 = "something else", - }; - try teststringify( - \\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"} - , - MyStruct{}, - StringifyOptions{}, - ); - try teststringify( - \\{"required":"something","another_required":"something else"} - , - MyStruct{}, - StringifyOptions{ .emit_null_optional_fields = false }, - ); - - var ts = TokenStream.init( - \\{"required":"something","another_required":"something else"} - ); - try std.testing.expect(try parsesTo(MyStruct, MyStruct{}, &ts, .{ - .allocator = std.testing.allocator, - })); -} - -test "skipValue" { - var ts = TokenStream.init("false"); - try skipValue(&ts); - ts = TokenStream.init("true"); - try skipValue(&ts); - ts = TokenStream.init("null"); - try skipValue(&ts); - ts = TokenStream.init("42"); - try skipValue(&ts); - ts = TokenStream.init("42.0"); - try skipValue(&ts); - ts = TokenStream.init("\"foo\""); - try skipValue(&ts); - ts = TokenStream.init("[101, 111, 121]"); - try skipValue(&ts); - ts = TokenStream.init("{}"); - try skipValue(&ts); - ts = TokenStream.init("{\"foo\": \"bar\"}"); - try skipValue(&ts); - - { // An absurd number of nestings - const nestings = StreamingParser.default_max_nestings + 1; - - ts = TokenStream.init("[" ** nestings ++ "]" ** nestings); - try testing.expectError(error.TooManyNestedItems, skipValue(&ts)); - } - - { // Would a number token cause problems in a deeply-nested array? - const nestings = StreamingParser.default_max_nestings; - const deeply_nested_array = "[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings; - - ts = TokenStream.init(deeply_nested_array); - try skipValue(&ts); - - ts = TokenStream.init("[" ++ deeply_nested_array ++ "]"); - try testing.expectError(error.TooManyNestedItems, skipValue(&ts)); - } - - // Mismatched brace/square bracket - ts = TokenStream.init("[102, 111, 111}"); - try testing.expectError(error.UnexpectedClosingBrace, skipValue(&ts)); - - { // should fail if no value found (e.g. immediate close of object) - var empty_object = TokenStream.init("{}"); - assert(.ObjectBegin == (try empty_object.next()).?); - try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_object)); - - var empty_array = TokenStream.init("[]"); - assert(.ArrayBegin == (try empty_array.next()).?); - try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_array)); - } -} - -test "stringify basic types" { - try teststringify("false", false, StringifyOptions{}); - try teststringify("true", true, StringifyOptions{}); - try teststringify("null", @as(?u8, null), StringifyOptions{}); - try teststringify("null", @as(?*u32, null), StringifyOptions{}); - try teststringify("42", 42, StringifyOptions{}); - try teststringify("4.2e+01", 42.0, StringifyOptions{}); - try teststringify("42", @as(u8, 42), StringifyOptions{}); - try teststringify("42", @as(u128, 42), StringifyOptions{}); - try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{}); - try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{}); - try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{}); -} - -test "stringify string" { - try teststringify("\"hello\"", "hello", StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{}); - try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{}); - try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{}); - try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{}); - try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{}); - try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{}); - try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{}); - try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{}); - try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{}); - try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"/\"", "/", StringifyOptions{}); - try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } }); -} - -test "stringify many-item sentinel-terminated string" { - try teststringify("\"hello\"", @as([*:0]const u8, "hello"), StringifyOptions{}); - try teststringify("\"with\\nescapes\\r\"", @as([*:0]const u8, "with\nescapes\r"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); - try teststringify("\"with unicode\\u0001\"", @as([*:0]const u8, "with unicode\u{1}"), StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } }); -} - -test "stringify tagged unions" { - try teststringify("42", union(enum) { - Foo: u32, - Bar: bool, - }{ .Foo = 42 }, StringifyOptions{}); -} - -test "stringify struct" { - try teststringify("{\"foo\":42}", struct { - foo: u32, - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify struct with string as array" { - try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{}); - try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array }); -} - -test "stringify struct with indentation" { - try teststringify( - \\{ - \\ "foo": 42, - \\ "bar": [ - \\ 1, - \\ 2, - \\ 3 - \\ ] - \\} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{}, - }, - ); - try teststringify( - "{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}", - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{ - .indent = .Tab, - .separator = false, - }, - }, - ); - try teststringify( - \\{"foo":42,"bar":[1,2,3]} - , - struct { - foo: u32, - bar: [3]u32, - }{ - .foo = 42, - .bar = .{ 1, 2, 3 }, - }, - StringifyOptions{ - .whitespace = .{ - .indent = .None, - .separator = false, - }, - }, - ); -} - -test "stringify struct with void field" { - try teststringify("{\"foo\":42}", struct { - foo: u32, - bar: void = {}, - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify array of structs" { - const MyStruct = struct { - foo: u32, - }; - try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{ - MyStruct{ .foo = 42 }, - MyStruct{ .foo = 100 }, - MyStruct{ .foo = 1000 }, - }, StringifyOptions{}); -} - -test "stringify struct with custom stringifier" { - try teststringify("[\"something special\",42]", struct { - foo: u32, - const Self = @This(); - pub fn jsonStringify( - value: Self, - options: StringifyOptions, - out_stream: anytype, - ) !void { - _ = value; - try out_stream.writeAll("[\"something special\","); - try stringify(42, options, out_stream); - try out_stream.writeByte(']'); - } - }{ .foo = 42 }, StringifyOptions{}); -} - -test "stringify vector" { - try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{}); -} - -test "stringify tuple" { - try teststringify("[\"foo\",42]", std.meta.Tuple(&.{ []const u8, usize }){ "foo", 42 }, StringifyOptions{}); -} - -fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void { - const ValidationWriter = struct { - const Self = @This(); - pub const Writer = std.io.Writer(*Self, Error, write); - pub const Error = error{ - TooMuchData, - DifferentData, - }; - - expected_remaining: []const u8, - - fn init(exp: []const u8) Self { - return .{ .expected_remaining = exp }; - } - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (self.expected_remaining.len < bytes.len) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining, - bytes, - }); - return error.TooMuchData; - } - if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) { - std.debug.print( - \\====== expected this output: ========= - \\{s} - \\======== instead found this: ========= - \\{s} - \\====================================== - , .{ - self.expected_remaining[0..bytes.len], - bytes, - }); - return error.DifferentData; - } - self.expected_remaining = self.expected_remaining[bytes.len..]; - return bytes.len; - } - }; - - var vos = ValidationWriter.init(expected); - try stringify(value, options, vos.writer()); - if (vos.expected_remaining.len > 0) return error.NotEnoughData; -} - -test "encodesTo" { - // same - try testing.expectEqual(true, encodesTo("false", "false")); - // totally different - try testing.expectEqual(false, encodesTo("false", "true")); - // different lengths - try testing.expectEqual(false, encodesTo("false", "other")); - // with escape - try testing.expectEqual(true, encodesTo("\\", "\\\\")); - try testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape")); - // with unicode - try testing.expectEqual(true, encodesTo("ą", "\\u0105")); - try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02")); - try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02")); -} - -test "deserializing string with escape sequence into sentinel slice" { - const json = "\"\\n\""; - var token_stream = std.json.TokenStream.init(json); - const options = ParseOptions{ .allocator = std.testing.allocator }; - - // Pre-fix, this line would panic: - const result = try std.json.parse([:0]const u8, &token_stream, options); - defer std.json.parseFree([:0]const u8, result, options); - - // Double-check that we're getting the right result - try testing.expect(mem.eql(u8, result, "\n")); -} - -test "stringify struct with custom stringify that returns a custom error" { - var ret = std.json.stringify(struct { - field: Field = .{}, - - pub const Field = struct { - field: ?[]*Field = null, - - const Self = @This(); - pub fn jsonStringify(_: Self, _: StringifyOptions, _: anytype) error{CustomError}!void { - return error.CustomError; - } - }; - }{}, StringifyOptions{}, std.io.null_writer); - - try std.testing.expectError(error.CustomError, ret); + _ = @import("json/dynamic.zig"); + _ = @import("json/static.zig"); + _ = @import("json/stringify.zig"); + _ = @import("json/JSONTestSuite_test.zig"); } |
