aboutsummaryrefslogtreecommitdiff
path: root/std/json.zig
diff options
context:
space:
mode:
authortgschultz <tgschultz@gmail.com>2018-05-11 21:36:02 -0500
committerGitHub <noreply@github.com>2018-05-11 21:36:02 -0500
commit8c1872543c8cf76215cc4bf3ced4637bb1065a4e (patch)
tree72dfebb643ab61579e3fb8dd58cd4610ffe876fa /std/json.zig
parent7186e92c86982950d0aa7c0c2deef9ef96bc1264 (diff)
parent6e821078f625a03eb8b7794c983da0f7793366ab (diff)
downloadzig-8c1872543c8cf76215cc4bf3ced4637bb1065a4e.tar.gz
zig-8c1872543c8cf76215cc4bf3ced4637bb1065a4e.zip
Merge pull request #1 from zig-lang/master
Sync with zig-lang/zig master
Diffstat (limited to 'std/json.zig')
-rw-r--r--std/json.zig1304
1 files changed, 1304 insertions, 0 deletions
diff --git a/std/json.zig b/std/json.zig
new file mode 100644
index 0000000000..6f853501ed
--- /dev/null
+++ b/std/json.zig
@@ -0,0 +1,1304 @@
+// JSON parser conforming to RFC8259.
+//
+// https://tools.ietf.org/html/rfc8259
+
+const std = @import("index.zig");
+const mem = std.mem;
+
+const u1 = @IntType(false, 1);
+const u256 = @IntType(false, 256);
+
+// A single token slice into the parent string.
+//
+// Use `token.slice()` on the inptu at the current position to get the current slice.
+pub const Token = struct {
+ id: Id,
+ // How many bytes do we skip before counting
+ offset: u1,
+ // Whether string contains a \uXXXX sequence and cannot be zero-copied
+ string_has_escape: bool,
+ // Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
+ number_is_integer: bool,
+ // How many bytes from the current position behind the start of this token is.
+ count: usize,
+
+ pub const Id = enum {
+ ObjectBegin,
+ ObjectEnd,
+ ArrayBegin,
+ ArrayEnd,
+ String,
+ Number,
+ True,
+ False,
+ Null,
+ };
+
+ pub fn init(id: Id, count: usize, offset: u1) Token {
+ return Token {
+ .id = id,
+ .offset = offset,
+ .string_has_escape = false,
+ .number_is_integer = true,
+ .count = count,
+ };
+ }
+
+ pub fn initString(count: usize, has_unicode_escape: bool) Token {
+ return Token {
+ .id = Id.String,
+ .offset = 0,
+ .string_has_escape = has_unicode_escape,
+ .number_is_integer = true,
+ .count = count,
+ };
+ }
+
+ pub fn initNumber(count: usize, number_is_integer: bool) Token {
+ return Token {
+ .id = Id.Number,
+ .offset = 0,
+ .string_has_escape = false,
+ .number_is_integer = number_is_integer,
+ .count = count,
+ };
+ }
+
+ // A marker token is a zero-length
+ pub fn initMarker(id: Id) Token {
+ return Token {
+ .id = id,
+ .offset = 0,
+ .string_has_escape = false,
+ .number_is_integer = true,
+ .count = 0,
+ };
+ }
+
+ // Slice into the underlying input string.
+ pub fn slice(self: &const Token, input: []const u8, i: usize) []const u8 {
+ return input[i + self.offset - self.count .. i + self.offset];
+ }
+};
+
+// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
+// they are encountered. No copies or allocations are performed during parsing and the entire
+// parsing state requires ~40-50 bytes of stack space.
+//
+// Conforms strictly to RFC8529.
+const StreamingJsonParser = struct {
+ // Current state
+ state: State,
+ // How many bytes we have counted for the current token
+ count: usize,
+ // What state to follow after parsing a string (either property or value string)
+ after_string_state: State,
+ // What state to follow after parsing a value (either top-level or value end)
+ after_value_state: State,
+ // If we stopped now, would the complete parsed string to now be a valid json string
+ complete: bool,
+ // Current token flags to pass through to the next generated, see Token.
+ string_has_escape: bool,
+ number_is_integer: bool,
+
+ // Bit-stack for nested object/map literals (max 255 nestings).
+ stack: u256,
+ stack_used: u8,
+
+ const object_bit = 0;
+ const array_bit = 1;
+ const max_stack_size = @maxValue(u8);
+
+ pub fn init() StreamingJsonParser {
+ var p: StreamingJsonParser = undefined;
+ p.reset();
+ return p;
+ }
+
+ pub fn reset(p: &StreamingJsonParser) void {
+ p.state = State.TopLevelBegin;
+ p.count = 0;
+ // Set before ever read in main transition function
+ p.after_string_state = undefined;
+ p.after_value_state = State.ValueEnd; // handle end of values normally
+ p.stack = 0;
+ p.stack_used = 0;
+ p.complete = false;
+ p.string_has_escape = false;
+ p.number_is_integer = true;
+ }
+
+ pub const State = enum {
+ // These must be first with these explicit values as we rely on them for indexing the
+ // bit-stack directly and avoiding a branch.
+ ObjectSeparator = 0,
+ ValueEnd = 1,
+
+ TopLevelBegin,
+ TopLevelEnd,
+
+ ValueBegin,
+ ValueBeginNoClosing,
+
+ String,
+ StringUtf8Byte3,
+ StringUtf8Byte2,
+ StringUtf8Byte1,
+ StringEscapeCharacter,
+ StringEscapeHexUnicode4,
+ StringEscapeHexUnicode3,
+ StringEscapeHexUnicode2,
+ StringEscapeHexUnicode1,
+
+ Number,
+ NumberMaybeDotOrExponent,
+ NumberMaybeDigitOrDotOrExponent,
+ NumberFractionalRequired,
+ NumberFractional,
+ NumberMaybeExponent,
+ NumberExponent,
+ NumberExponentDigitsRequired,
+ NumberExponentDigits,
+
+ TrueLiteral1,
+ TrueLiteral2,
+ TrueLiteral3,
+
+ FalseLiteral1,
+ FalseLiteral2,
+ FalseLiteral3,
+ FalseLiteral4,
+
+ NullLiteral1,
+ NullLiteral2,
+ NullLiteral3,
+
+ // Only call this function to generate array/object final state.
+ pub fn fromInt(x: var) State {
+ std.debug.assert(x == 0 or x == 1);
+ const T = @TagType(State);
+ return State(T(x));
+ }
+ };
+
+ pub const Error = error {
+ InvalidTopLevel,
+ TooManyNestedItems,
+ TooManyClosingItems,
+ InvalidValueBegin,
+ InvalidValueEnd,
+ UnbalancedBrackets,
+ UnbalancedBraces,
+ UnexpectedClosingBracket,
+ UnexpectedClosingBrace,
+ InvalidNumber,
+ InvalidSeparator,
+ InvalidLiteral,
+ InvalidEscapeCharacter,
+ InvalidUnicodeHexSymbol,
+ InvalidUtf8Byte,
+ InvalidTopLevelTrailing,
+ InvalidControlCharacter,
+ };
+
+ // Give another byte to the parser and obtain any new tokens. This may (rarely) return two
+ // tokens. token2 is always null if token1 is null.
+ //
+ // There is currently no error recovery on a bad stream.
+ pub fn feed(p: &StreamingJsonParser, c: u8, token1: &?Token, token2: &?Token) Error!void {
+ *token1 = null;
+ *token2 = null;
+ p.count += 1;
+
+ // unlikely
+ if (try p.transition(c, token1)) {
+ _ = try p.transition(c, token2);
+ }
+ }
+
+ // Perform a single transition on the state machine and return any possible token.
+ fn transition(p: &StreamingJsonParser, c: u8, token: &?Token) Error!bool {
+ switch (p.state) {
+ State.TopLevelBegin => switch (c) {
+ '{' => {
+ p.stack <<= 1;
+ p.stack |= object_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ObjectSeparator;
+
+ *token = Token.initMarker(Token.Id.ObjectBegin);
+ },
+ '[' => {
+ p.stack <<= 1;
+ p.stack |= array_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ValueEnd;
+
+ *token = Token.initMarker(Token.Id.ArrayBegin);
+ },
+ '-' => {
+ p.number_is_integer = true;
+ p.state = State.Number;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ '0' => {
+ p.number_is_integer = true;
+ p.state = State.NumberMaybeDotOrExponent;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ '1' ... '9' => {
+ p.number_is_integer = true;
+ p.state = State.NumberMaybeDigitOrDotOrExponent;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ '"' => {
+ p.state = State.String;
+ p.after_value_state = State.TopLevelEnd;
+ // We don't actually need the following since after_value_state should override.
+ p.after_string_state = State.ValueEnd;
+ p.string_has_escape = false;
+ p.count = 0;
+ },
+ 't' => {
+ p.state = State.TrueLiteral1;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ 'f' => {
+ p.state = State.FalseLiteral1;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ 'n' => {
+ p.state = State.NullLiteral1;
+ p.after_value_state = State.TopLevelEnd;
+ p.count = 0;
+ },
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidTopLevel;
+ },
+ },
+
+ State.TopLevelEnd => switch (c) {
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidTopLevelTrailing;
+ },
+ },
+
+ State.ValueBegin => switch (c) {
+ // NOTE: These are shared in ValueEnd as well, think we can reorder states to
+ // be a bit clearer and avoid this duplication.
+ '}' => {
+ // unlikely
+ if (p.stack & 1 != object_bit) {
+ return error.UnexpectedClosingBracket;
+ }
+ if (p.stack_used == 0) {
+ return error.TooManyClosingItems;
+ }
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.fromInt(p.stack & 1);
+
+ p.stack >>= 1;
+ p.stack_used -= 1;
+
+ switch (p.stack_used) {
+ 0 => {
+ p.complete = true;
+ p.state = State.TopLevelEnd;
+ },
+ else => {},
+ }
+
+ *token = Token.initMarker(Token.Id.ObjectEnd);
+ },
+ ']' => {
+ if (p.stack & 1 != array_bit) {
+ return error.UnexpectedClosingBrace;
+ }
+ if (p.stack_used == 0) {
+ return error.TooManyClosingItems;
+ }
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.fromInt(p.stack & 1);
+
+ p.stack >>= 1;
+ p.stack_used -= 1;
+
+ switch (p.stack_used) {
+ 0 => {
+ p.complete = true;
+ p.state = State.TopLevelEnd;
+ },
+ else => {},
+ }
+
+ *token = Token.initMarker(Token.Id.ArrayEnd);
+ },
+ '{' => {
+ if (p.stack_used == max_stack_size) {
+ return error.TooManyNestedItems;
+ }
+
+ p.stack <<= 1;
+ p.stack |= object_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ObjectSeparator;
+
+ *token = Token.initMarker(Token.Id.ObjectBegin);
+ },
+ '[' => {
+ if (p.stack_used == max_stack_size) {
+ return error.TooManyNestedItems;
+ }
+
+ p.stack <<= 1;
+ p.stack |= array_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ValueEnd;
+
+ *token = Token.initMarker(Token.Id.ArrayBegin);
+ },
+ '-' => {
+ p.state = State.Number;
+ p.count = 0;
+ },
+ '0' => {
+ p.state = State.NumberMaybeDotOrExponent;
+ p.count = 0;
+ },
+ '1' ... '9' => {
+ p.state = State.NumberMaybeDigitOrDotOrExponent;
+ p.count = 0;
+ },
+ '"' => {
+ p.state = State.String;
+ p.count = 0;
+ },
+ 't' => {
+ p.state = State.TrueLiteral1;
+ p.count = 0;
+ },
+ 'f' => {
+ p.state = State.FalseLiteral1;
+ p.count = 0;
+ },
+ 'n' => {
+ p.state = State.NullLiteral1;
+ p.count = 0;
+ },
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidValueBegin;
+ },
+ },
+
+ // TODO: A bit of duplication here and in the following state, redo.
+ State.ValueBeginNoClosing => switch (c) {
+ '{' => {
+ if (p.stack_used == max_stack_size) {
+ return error.TooManyNestedItems;
+ }
+
+ p.stack <<= 1;
+ p.stack |= object_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ObjectSeparator;
+
+ *token = Token.initMarker(Token.Id.ObjectBegin);
+ },
+ '[' => {
+ if (p.stack_used == max_stack_size) {
+ return error.TooManyNestedItems;
+ }
+
+ p.stack <<= 1;
+ p.stack |= array_bit;
+ p.stack_used += 1;
+
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ValueEnd;
+
+ *token = Token.initMarker(Token.Id.ArrayBegin);
+ },
+ '-' => {
+ p.state = State.Number;
+ p.count = 0;
+ },
+ '0' => {
+ p.state = State.NumberMaybeDotOrExponent;
+ p.count = 0;
+ },
+ '1' ... '9' => {
+ p.state = State.NumberMaybeDigitOrDotOrExponent;
+ p.count = 0;
+ },
+ '"' => {
+ p.state = State.String;
+ p.count = 0;
+ },
+ 't' => {
+ p.state = State.TrueLiteral1;
+ p.count = 0;
+ },
+ 'f' => {
+ p.state = State.FalseLiteral1;
+ p.count = 0;
+ },
+ 'n' => {
+ p.state = State.NullLiteral1;
+ p.count = 0;
+ },
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidValueBegin;
+ },
+ },
+
+ State.ValueEnd => switch (c) {
+ ',' => {
+ p.after_string_state = State.fromInt(p.stack & 1);
+ p.state = State.ValueBeginNoClosing;
+ },
+ ']' => {
+ if (p.stack_used == 0) {
+ return error.UnbalancedBrackets;
+ }
+
+ p.state = State.ValueEnd;
+ p.after_string_state = State.fromInt(p.stack & 1);
+
+ p.stack >>= 1;
+ p.stack_used -= 1;
+
+ if (p.stack_used == 0) {
+ p.complete = true;
+ p.state = State.TopLevelEnd;
+ }
+
+ *token = Token.initMarker(Token.Id.ArrayEnd);
+ },
+ '}' => {
+ if (p.stack_used == 0) {
+ return error.UnbalancedBraces;
+ }
+
+ p.state = State.ValueEnd;
+ p.after_string_state = State.fromInt(p.stack & 1);
+
+ p.stack >>= 1;
+ p.stack_used -= 1;
+
+ if (p.stack_used == 0) {
+ p.complete = true;
+ p.state = State.TopLevelEnd;
+ }
+
+ *token = Token.initMarker(Token.Id.ObjectEnd);
+ },
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidValueEnd;
+ },
+ },
+
+ State.ObjectSeparator => switch (c) {
+ ':' => {
+ p.state = State.ValueBegin;
+ p.after_string_state = State.ValueEnd;
+ },
+ 0x09, 0x0A, 0x0D, 0x20 => {
+ // whitespace
+ },
+ else => {
+ return error.InvalidSeparator;
+ },
+ },
+
+ State.String => switch (c) {
+ 0x00 ... 0x1F => {
+ return error.InvalidControlCharacter;
+ },
+ '"' => {
+ p.state = p.after_string_state;
+ if (p.after_value_state == State.TopLevelEnd) {
+ p.state = State.TopLevelEnd;
+ p.complete = true;
+ }
+
+ *token = Token.initString(p.count - 1, p.string_has_escape);
+ },
+ '\\' => {
+ p.state = State.StringEscapeCharacter;
+ },
+ 0x20, 0x21, 0x23 ... 0x5B, 0x5D ... 0x7F => {
+ // non-control ascii
+ },
+ 0xC0 ... 0xDF => {
+ p.state = State.StringUtf8Byte1;
+ },
+ 0xE0 ... 0xEF => {
+ p.state = State.StringUtf8Byte2;
+ },
+ 0xF0 ... 0xFF => {
+ p.state = State.StringUtf8Byte3;
+ },
+ else => {
+ return error.InvalidUtf8Byte;
+ },
+ },
+
+ State.StringUtf8Byte3 => switch (c >> 6) {
+ 0b10 => p.state = State.StringUtf8Byte2,
+ else => return error.InvalidUtf8Byte,
+ },
+
+ State.StringUtf8Byte2 => switch (c >> 6) {
+ 0b10 => p.state = State.StringUtf8Byte1,
+ else => return error.InvalidUtf8Byte,
+ },
+
+ State.StringUtf8Byte1 => switch (c >> 6) {
+ 0b10 => p.state = State.String,
+ else => return error.InvalidUtf8Byte,
+ },
+
+ State.StringEscapeCharacter => switch (c) {
+ // NOTE: '/' is allowed as an escaped character but it also is allowed
+ // as unescaped according to the RFC. There is a reported errata which suggests
+ // removing the non-escaped variant but it makes more sense to simply disallow
+ // it as an escape code here.
+ //
+ // The current JSONTestSuite tests rely on both of this behaviour being present
+ // however, so we default to the status quo where both are accepted until this
+ // is further clarified.
+ '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
+ p.string_has_escape = true;
+ p.state = State.String;
+ },
+ 'u' => {
+ p.string_has_escape = true;
+ p.state = State.StringEscapeHexUnicode4;
+ },
+ else => {
+ return error.InvalidEscapeCharacter;
+ },
+ },
+
+ State.StringEscapeHexUnicode4 => switch (c) {
+ '0' ... '9', 'A' ... 'F', 'a' ... 'f' => {
+ p.state = State.StringEscapeHexUnicode3;
+ },
+ else => return error.InvalidUnicodeHexSymbol,
+ },
+
+ State.StringEscapeHexUnicode3 => switch (c) {
+ '0' ... '9', 'A' ... 'F', 'a' ... 'f' => {
+ p.state = State.StringEscapeHexUnicode2;
+ },
+ else => return error.InvalidUnicodeHexSymbol,
+ },
+
+ State.StringEscapeHexUnicode2 => switch (c) {
+ '0' ... '9', 'A' ... 'F', 'a' ... 'f' => {
+ p.state = State.StringEscapeHexUnicode1;
+ },
+ else => return error.InvalidUnicodeHexSymbol,
+ },
+
+ State.StringEscapeHexUnicode1 => switch (c) {
+ '0' ... '9', 'A' ... 'F', 'a' ... 'f' => {
+ p.state = State.String;
+ },
+ else => return error.InvalidUnicodeHexSymbol,
+ },
+
+ State.Number => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '0' => {
+ p.state = State.NumberMaybeDotOrExponent;
+ },
+ '1' ... '9' => {
+ p.state = State.NumberMaybeDigitOrDotOrExponent;
+ },
+ else => {
+ return error.InvalidNumber;
+ },
+ }
+ },
+
+ State.NumberMaybeDotOrExponent => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '.' => {
+ p.number_is_integer = false;
+ p.state = State.NumberFractionalRequired;
+ },
+ 'e', 'E' => {
+ p.number_is_integer = false;
+ p.state = State.NumberExponent;
+ },
+ else => {
+ p.state = p.after_value_state;
+ *token = Token.initNumber(p.count, p.number_is_integer);
+ return true;
+ },
+ }
+ },
+
+ State.NumberMaybeDigitOrDotOrExponent => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '.' => {
+ p.number_is_integer = false;
+ p.state = State.NumberFractionalRequired;
+ },
+ 'e', 'E' => {
+ p.number_is_integer = false;
+ p.state = State.NumberExponent;
+ },
+ '0' ... '9' => {
+ // another digit
+ },
+ else => {
+ p.state = p.after_value_state;
+ *token = Token.initNumber(p.count, p.number_is_integer);
+ return true;
+ },
+ }
+ },
+
+ State.NumberFractionalRequired => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '0' ... '9' => {
+ p.state = State.NumberFractional;
+ },
+ else => {
+ return error.InvalidNumber;
+ },
+ }
+ },
+
+ State.NumberFractional => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '0' ... '9' => {
+ // another digit
+ },
+ 'e', 'E' => {
+ p.number_is_integer = false;
+ p.state = State.NumberExponent;
+ },
+ else => {
+ p.state = p.after_value_state;
+ *token = Token.initNumber(p.count, p.number_is_integer);
+ return true;
+ },
+ }
+ },
+
+ State.NumberMaybeExponent => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ 'e', 'E' => {
+ p.number_is_integer = false;
+ p.state = State.NumberExponent;
+ },
+ else => {
+ p.state = p.after_value_state;
+ *token = Token.initNumber(p.count, p.number_is_integer);
+ return true;
+ },
+ }
+ },
+
+ State.NumberExponent => switch (c) {
+ '-', '+', => {
+ p.complete = false;
+ p.state = State.NumberExponentDigitsRequired;
+ },
+ '0' ... '9' => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ p.state = State.NumberExponentDigits;
+ },
+ else => {
+ return error.InvalidNumber;
+ },
+ },
+
+ State.NumberExponentDigitsRequired => switch (c) {
+ '0' ... '9' => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ p.state = State.NumberExponentDigits;
+ },
+ else => {
+ return error.InvalidNumber;
+ },
+ },
+
+ State.NumberExponentDigits => {
+ p.complete = p.after_value_state == State.TopLevelEnd;
+ switch (c) {
+ '0' ... '9' => {
+ // another digit
+ },
+ else => {
+ p.state = p.after_value_state;
+ *token = Token.initNumber(p.count, p.number_is_integer);
+ return true;
+ },
+ }
+ },
+
+ State.TrueLiteral1 => switch (c) {
+ 'r' => p.state = State.TrueLiteral2,
+ else => return error.InvalidLiteral,
+ },
+
+ State.TrueLiteral2 => switch (c) {
+ 'u' => p.state = State.TrueLiteral3,
+ else => return error.InvalidLiteral,
+ },
+
+ State.TrueLiteral3 => switch (c) {
+ 'e' => {
+ p.state = p.after_value_state;
+ p.complete = p.state == State.TopLevelEnd;
+ *token = Token.init(Token.Id.True, p.count + 1, 1);
+ },
+ else => {
+ return error.InvalidLiteral;
+ },
+ },
+
+ State.FalseLiteral1 => switch (c) {
+ 'a' => p.state = State.FalseLiteral2,
+ else => return error.InvalidLiteral,
+ },
+
+ State.FalseLiteral2 => switch (c) {
+ 'l' => p.state = State.FalseLiteral3,
+ else => return error.InvalidLiteral,
+ },
+
+ State.FalseLiteral3 => switch (c) {
+ 's' => p.state = State.FalseLiteral4,
+ else => return error.InvalidLiteral,
+ },
+
+ State.FalseLiteral4 => switch (c) {
+ 'e' => {
+ p.state = p.after_value_state;
+ p.complete = p.state == State.TopLevelEnd;
+ *token = Token.init(Token.Id.False, p.count + 1, 1);
+ },
+ else => {
+ return error.InvalidLiteral;
+ },
+ },
+
+ State.NullLiteral1 => switch (c) {
+ 'u' => p.state = State.NullLiteral2,
+ else => return error.InvalidLiteral,
+ },
+
+ State.NullLiteral2 => switch (c) {
+ 'l' => p.state = State.NullLiteral3,
+ else => return error.InvalidLiteral,
+ },
+
+ State.NullLiteral3 => switch (c) {
+ 'l' => {
+ p.state = p.after_value_state;
+ p.complete = p.state == State.TopLevelEnd;
+ *token = Token.init(Token.Id.Null, p.count + 1, 1);
+ },
+ else => {
+ return error.InvalidLiteral;
+ },
+ },
+ }
+
+ return false;
+ }
+};
+
+// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
+// be able to decode the string even if this returns true.
+pub fn validate(s: []const u8) bool {
+ var p = StreamingJsonParser.init();
+
+ for (s) |c, i| {
+ var token1: ?Token = undefined;
+ var token2: ?Token = undefined;
+
+ p.feed(c, &token1, &token2) catch |err| {
+ return false;
+ };
+ }
+
+ return p.complete;
+}
+
+const Allocator = std.mem.Allocator;
+const ArenaAllocator = std.heap.ArenaAllocator;
+const ArrayList = std.ArrayList;
+const HashMap = std.HashMap;
+
+pub const ValueTree = struct {
+ arena: ArenaAllocator,
+ root: Value,
+
+ pub fn deinit(self: &ValueTree) void {
+ self.arena.deinit();
+ }
+};
+
+pub const ObjectMap = HashMap([]const u8, Value, mem.hash_slice_u8, mem.eql_slice_u8);
+
+pub const Value = union(enum) {
+ Null,
+ Bool: bool,
+ Integer: i64,
+ Float: f64,
+ String: []const u8,
+ Array: ArrayList(Value),
+ Object: ObjectMap,
+
+ pub fn dump(self: &const Value) void {
+ switch (*self) {
+ Value.Null => {
+ std.debug.warn("null");
+ },
+ Value.Bool => |inner| {
+ std.debug.warn("{}", inner);
+ },
+ Value.Integer => |inner| {
+ std.debug.warn("{}", inner);
+ },
+ Value.Float => |inner| {
+ std.debug.warn("{.5}", inner);
+ },
+ Value.String => |inner| {
+ std.debug.warn("\"{}\"", inner);
+ },
+ Value.Array => |inner| {
+ var not_first = false;
+ std.debug.warn("[");
+ for (inner.toSliceConst()) |value| {
+ if (not_first) {
+ std.debug.warn(",");
+ }
+ not_first = true;
+ value.dump();
+ }
+ std.debug.warn("]");
+ },
+ Value.Object => |inner| {
+ var not_first = false;
+ std.debug.warn("{{");
+ var it = inner.iterator();
+
+ while (it.next()) |entry| {
+ if (not_first) {
+ std.debug.warn(",");
+ }
+ not_first = true;
+ std.debug.warn("\"{}\":", entry.key);
+ entry.value.dump();
+ }
+ std.debug.warn("}}");
+ },
+ }
+ }
+
+ pub fn dumpIndent(self: &const Value, indent: usize) void {
+ if (indent == 0) {
+ self.dump();
+ } else {
+ self.dumpIndentLevel(indent, 0);
+ }
+ }
+
+ fn dumpIndentLevel(self: &const Value, indent: usize, level: usize) void {
+ switch (*self) {
+ Value.Null => {
+ std.debug.warn("null");
+ },
+ Value.Bool => |inner| {
+ std.debug.warn("{}", inner);
+ },
+ Value.Integer => |inner| {
+ std.debug.warn("{}", inner);
+ },
+ Value.Float => |inner| {
+ std.debug.warn("{.5}", inner);
+ },
+ Value.String => |inner| {
+ std.debug.warn("\"{}\"", inner);
+ },
+ Value.Array => |inner| {
+ var not_first = false;
+ std.debug.warn("[\n");
+
+ for (inner.toSliceConst()) |value| {
+ if (not_first) {
+ std.debug.warn(",\n");
+ }
+ not_first = true;
+ padSpace(level + indent);
+ value.dumpIndentLevel(indent, level + indent);
+ }
+ std.debug.warn("\n");
+ padSpace(level);
+ std.debug.warn("]");
+ },
+ Value.Object => |inner| {
+ var not_first = false;
+ std.debug.warn("{{\n");
+ var it = inner.iterator();
+
+ while (it.next()) |entry| {
+ if (not_first) {
+ std.debug.warn(",\n");
+ }
+ not_first = true;
+ padSpace(level + indent);
+ std.debug.warn("\"{}\": ", entry.key);
+ entry.value.dumpIndentLevel(indent, level + indent);
+ }
+ std.debug.warn("\n");
+ padSpace(level);
+ std.debug.warn("}}");
+ },
+ }
+ }
+
+ fn padSpace(indent: usize) void {
+ var i: usize = 0;
+ while (i < indent) : (i += 1) {
+ std.debug.warn(" ");
+ }
+ }
+};
+
+// A non-stream JSON parser which constructs a tree of Value's.
+const JsonParser = struct {
+ allocator: &Allocator,
+ state: State,
+ copy_strings: bool,
+ // Stores parent nodes and un-combined Values.
+ stack: ArrayList(Value),
+
+ const State = enum {
+ ObjectKey,
+ ObjectValue,
+ ArrayValue,
+ Simple,
+ };
+
+ pub fn init(allocator: &Allocator, copy_strings: bool) JsonParser {
+ return JsonParser {
+ .allocator = allocator,
+ .state = State.Simple,
+ .copy_strings = copy_strings,
+ .stack = ArrayList(Value).init(allocator),
+ };
+ }
+
+ pub fn deinit(p: &JsonParser) void {
+ p.stack.deinit();
+ }
+
+ pub fn reset(p: &JsonParser) void {
+ p.state = State.Simple;
+ p.stack.shrink(0);
+ }
+
+ pub fn parse(p: &JsonParser, input: []const u8) !ValueTree {
+ var mp = StreamingJsonParser.init();
+
+ var arena = ArenaAllocator.init(p.allocator);
+ errdefer arena.deinit();
+
+ for (input) |c, i| {
+ var mt1: ?Token = undefined;
+ var mt2: ?Token = undefined;
+
+ try mp.feed(c, &mt1, &mt2);
+ if (mt1) |t1| {
+ try p.transition(&arena.allocator, input, i, t1);
+
+ if (mt2) |t2| {
+ try p.transition(&arena.allocator, input, i, t2);
+ }
+ }
+ }
+
+ // Handle top-level lonely number values.
+ {
+ const i = input.len;
+ var mt1: ?Token = undefined;
+ var mt2: ?Token = undefined;
+
+ try mp.feed(' ', &mt1, &mt2);
+ if (mt1) |t1| {
+ try p.transition(&arena.allocator, input, i, t1);
+ }
+ }
+
+ if (!mp.complete) {
+ return error.IncompleteJsonInput;
+ }
+
+ std.debug.assert(p.stack.len == 1);
+
+ return ValueTree {
+ .arena = arena,
+ .root = p.stack.at(0),
+ };
+ }
+
+ // Even though p.allocator exists, we take an explicit allocator so that allocation state
+ // can be cleaned up on error correctly during a `parse` on call.
+ fn transition(p: &JsonParser, allocator: &Allocator, input: []const u8, i: usize, token: &const Token) !void {
+ switch (p.state) {
+ State.ObjectKey => switch (token.id) {
+ Token.Id.ObjectEnd => {
+ if (p.stack.len == 1) {
+ return;
+ }
+
+ var value = p.stack.pop();
+ try p.pushToParent(value);
+ },
+ Token.Id.String => {
+ try p.stack.append(try p.parseString(allocator, token, input, i));
+ p.state = State.ObjectValue;
+ },
+ else => {
+ unreachable;
+ },
+ },
+ State.ObjectValue => {
+ var object = &p.stack.items[p.stack.len - 2].Object;
+ var key = p.stack.items[p.stack.len - 1].String;
+
+ switch (token.id) {
+ Token.Id.ObjectBegin => {
+ try p.stack.append(Value { .Object = ObjectMap.init(allocator) });
+ p.state = State.ObjectKey;
+ },
+ Token.Id.ArrayBegin => {
+ try p.stack.append(Value { .Array = ArrayList(Value).init(allocator) });
+ p.state = State.ArrayValue;
+ },
+ Token.Id.String => {
+ _ = try object.put(key, try p.parseString(allocator, token, input, i));
+ _ = p.stack.pop();
+ p.state = State.ObjectKey;
+ },
+ Token.Id.Number => {
+ _ = try object.put(key, try p.parseNumber(token, input, i));
+ _ = p.stack.pop();
+ p.state = State.ObjectKey;
+ },
+ Token.Id.True => {
+ _ = try object.put(key, Value { .Bool = true });
+ _ = p.stack.pop();
+ p.state = State.ObjectKey;
+ },
+ Token.Id.False => {
+ _ = try object.put(key, Value { .Bool = false });
+ _ = p.stack.pop();
+ p.state = State.ObjectKey;
+ },
+ Token.Id.Null => {
+ _ = try object.put(key, Value.Null);
+ _ = p.stack.pop();
+ p.state = State.ObjectKey;
+ },
+ else => {
+ unreachable;
+ },
+ }
+ },
+ State.ArrayValue => {
+ var array = &p.stack.items[p.stack.len - 1].Array;
+
+ switch (token.id) {
+ Token.Id.ArrayEnd => {
+ if (p.stack.len == 1) {
+ return;
+ }
+
+ var value = p.stack.pop();
+ try p.pushToParent(value);
+ },
+ Token.Id.ObjectBegin => {
+ try p.stack.append(Value { .Object = ObjectMap.init(allocator) });
+ p.state = State.ObjectKey;
+ },
+ Token.Id.ArrayBegin => {
+ try p.stack.append(Value { .Array = ArrayList(Value).init(allocator) });
+ p.state = State.ArrayValue;
+ },
+ Token.Id.String => {
+ try array.append(try p.parseString(allocator, token, input, i));
+ },
+ Token.Id.Number => {
+ try array.append(try p.parseNumber(token, input, i));
+ },
+ Token.Id.True => {
+ try array.append(Value { .Bool = true });
+ },
+ Token.Id.False => {
+ try array.append(Value { .Bool = false });
+ },
+ Token.Id.Null => {
+ try array.append(Value.Null);
+ },
+ else => {
+ unreachable;
+ },
+ }
+ },
+ State.Simple => switch (token.id) {
+ Token.Id.ObjectBegin => {
+ try p.stack.append(Value { .Object = ObjectMap.init(allocator) });
+ p.state = State.ObjectKey;
+ },
+ Token.Id.ArrayBegin => {
+ try p.stack.append(Value { .Array = ArrayList(Value).init(allocator) });
+ p.state = State.ArrayValue;
+ },
+ Token.Id.String => {
+ try p.stack.append(try p.parseString(allocator, token, input, i));
+ },
+ Token.Id.Number => {
+ try p.stack.append(try p.parseNumber(token, input, i));
+ },
+ Token.Id.True => {
+ try p.stack.append(Value { .Bool = true });
+ },
+ Token.Id.False => {
+ try p.stack.append(Value { .Bool = false });
+ },
+ Token.Id.Null => {
+ try p.stack.append(Value.Null);
+ },
+ Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
+ unreachable;
+ },
+ },
+ }
+ }
+
+ fn pushToParent(p: &JsonParser, value: &const Value) !void {
+ switch (p.stack.at(p.stack.len - 1)) {
+ // Object Parent -> [ ..., object, <key>, value ]
+ Value.String => |key| {
+ _ = p.stack.pop();
+
+ var object = &p.stack.items[p.stack.len - 1].Object;
+ _ = try object.put(key, value);
+ p.state = State.ObjectKey;
+ },
+ // Array Parent -> [ ..., <array>, value ]
+ Value.Array => |*array| {
+ try array.append(value);
+ p.state = State.ArrayValue;
+ },
+ else => {
+ unreachable;
+ },
+ }
+ }
+
+ fn parseString(p: &JsonParser, allocator: &Allocator, token: &const Token, input: []const u8, i: usize) !Value {
+ // TODO: We don't strictly have to copy values which do not contain any escape
+ // characters if flagged with the option.
+ const slice = token.slice(input, i);
+ return Value { .String = try mem.dupe(p.allocator, u8, slice) };
+ }
+
+ fn parseNumber(p: &JsonParser, token: &const Token, input: []const u8, i: usize) !Value {
+ return if (token.number_is_integer)
+ Value { .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
+ else
+ @panic("TODO: fmt.parseFloat not yet implemented")
+ ;
+ }
+};
+
+const debug = std.debug;
+
+test "json parser dynamic" {
+ var p = JsonParser.init(std.debug.global_allocator, false);
+ defer p.deinit();
+
+ const s =
+ \\{
+ \\ "Image": {
+ \\ "Width": 800,
+ \\ "Height": 600,
+ \\ "Title": "View from 15th Floor",
+ \\ "Thumbnail": {
+ \\ "Url": "http://www.example.com/image/481989943",
+ \\ "Height": 125,
+ \\ "Width": 100
+ \\ },
+ \\ "Animated" : false,
+ \\ "IDs": [116, 943, 234, 38793]
+ \\ }
+ \\}
+ ;
+
+ var tree = try p.parse(s);
+ defer tree.deinit();
+
+ var root = tree.root;
+
+ var image = (??root.Object.get("Image")).value;
+
+ const width = (??image.Object.get("Width")).value;
+ debug.assert(width.Integer == 800);
+
+ const height = (??image.Object.get("Height")).value;
+ debug.assert(height.Integer == 600);
+
+ const title = (??image.Object.get("Title")).value;
+ debug.assert(mem.eql(u8, title.String, "View from 15th Floor"));
+
+ const animated = (??image.Object.get("Animated")).value;
+ debug.assert(animated.Bool == false);
+}