diff options
| author | daurnimator <quae@daurnimator.com> | 2019-12-29 17:46:32 +1100 |
|---|---|---|
| committer | daurnimator <quae@daurnimator.com> | 2020-02-19 23:16:35 +1100 |
| commit | d989396a3473836a9e2e269b2be9675574ead389 (patch) | |
| tree | 0bfe0ff21e33d9b188f9f766de420902a4886700 /lib/std/json.zig | |
| parent | 5a2060482042129e64af2b6281f8e4bbf1f2fe6e (diff) | |
| download | zig-d989396a3473836a9e2e269b2be9675574ead389.tar.gz zig-d989396a3473836a9e2e269b2be9675574ead389.zip | |
std: add json.parse to automatically decode json into a struct
Diffstat (limited to 'lib/std/json.zig')
| -rw-r--r-- | lib/std/json.zig | 552 |
1 files changed, 552 insertions, 0 deletions
diff --git a/lib/std/json.zig b/lib/std/json.zig index 37ee20f2a2..830f492e74 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -19,6 +19,74 @@ const StringEscapes = union(enum) { }, }; +/// Checks to see if a string matches what it would be as a json-encoded string +/// Assumes that `encoded` is a well-formed json string +fn encodesTo(decoded: []const u8, encoded: []const u8) bool { + var i: usize = 0; + var j: usize = 0; + while (i < decoded.len) { + if (j >= encoded.len) return false; + if (encoded[j] != '\\') { + if (decoded[i] != encoded[j]) return false; + j += 1; + i += 1; + } else { + const escape_type = encoded[j + 1]; + if (escape_type != 'u') { + const t: u8 = switch (escape_type) { + '\\' => '\\', + '/' => '/', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'f' => 12, + 'b' => 8, + '"' => '"', + else => unreachable, + }; + if (decoded[i] != t) return false; + j += 2; + i += 1; + } else { + var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; + j += 6; + if (codepoint >= 0xD800 and codepoint < 0xDC00) { + // surrogate pair + assert(encoded[j] == '\\'); + assert(encoded[j + 1] == 'u'); + const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable; + codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff)); + j += 6; + } + var buf: [4]u8 = undefined; + const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable; + if (i + len > decoded.len) return false; + if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false; + i += len; + } + } + } + assert(i == decoded.len); + assert(j == encoded.len); + return true; +} + +test "encodesTo" { + // same + testing.expectEqual(true, encodesTo("false", "false")); + // totally different + testing.expectEqual(false, encodesTo("false", "true")); + // differnt lengths + testing.expectEqual(false, encodesTo("false", "other")); + // with escape + testing.expectEqual(true, encodesTo("\\", "\\\\")); + testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape")); + // with unicode + testing.expectEqual(true, encodesTo("ą", "\\u0105")); + testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02")); + testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02")); +} + /// A single token slice into the parent string. /// /// Use `token.slice()` on the input at the current position to get the current slice. @@ -1201,6 +1269,490 @@ pub const Value = union(enum) { } }; +pub const ParseOptions = struct { + allocator: ?*Allocator = null, + + /// Behaviour when a duplicate field is encountered. + duplicate_field_behavior: enum { + UseFirst, + Error, + UseLast, + } = .Error, +}; + +fn parseInternal(comptime T: type, token: Token, tokens: *TokenStream, options: ParseOptions) !T { + switch (@typeInfo(T)) { + .Bool => { + return switch (token) { + .True => true, + .False => false, + else => error.UnexpectedToken, + }; + }, + .Float, .ComptimeFloat => { + const numberToken = switch (token) { + .Number => |n| n, + else => return error.UnexpectedToken, + }; + return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)); + }, + .Int, .ComptimeInt => { + const numberToken = switch (token) { + .Number => |n| n, + else => return error.UnexpectedToken, + }; + if (!numberToken.is_integer) return error.UnexpectedToken; + return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10); + }, + .Optional => |optionalInfo| { + if (token == .Null) { + return null; + } else { + return try parseInternal(optionalInfo.child, token, tokens, options); + } + }, + .Enum => |enumInfo| { + switch (token) { + .Number => |numberToken| { + if (!numberToken.is_integer) return error.UnexpectedToken; + const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10); + return try std.meta.intToEnum(T, n); + }, + .String => |stringToken| { + const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); + switch (stringToken.escapes) { + .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag, + .Some => { + inline for (enumInfo.fields) |field| { + if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) { + return @field(T, field.name); + } + } + return error.InvalidEnumTag; + }, + } + }, + else => return error.UnexpectedToken, + } + }, + .Union => |unionInfo| { + if (unionInfo.tag_type) |_| { + // try each of the union fields until we find one that matches + inline for (unionInfo.fields) |u_field| { + if (parseInternal(u_field.field_type, token, tokens, options)) |value| { + return @unionInit(T, u_field.name, value); + } else |err| { + // Bubble up error.OutOfMemory + // Parsing some types won't have OutOfMemory in their + // error-sets, for the condition to be valid, merge it in. + if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err; + // otherwise continue through the `inline for` + } + } + return error.NoUnionMembersMatched; + } else { + @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'"); + } + }, + .Struct => |structInfo| { + switch (token) { + .ObjectBegin => {}, + else => return error.UnexpectedToken, + } + var r: T = undefined; + var fields_seen = [_]bool{false} ** structInfo.fields.len; + errdefer { + inline for (structInfo.fields) |field, i| { + if (fields_seen[i]) { + parseFree(field.field_type, @field(r, field.name), options); + } + } + } + + while (true) { + switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) { + .ObjectEnd => break, + .String => |stringToken| { + const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1); + var found = false; + inline for (structInfo.fields) |field, i| { + // TODO: using switches here segfault the compiler (#2727?) + if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) { + // if (switch (stringToken.escapes) { + // .None => mem.eql(u8, field.name, key_source_slice), + // .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)), + // }) { + if (fields_seen[i]) { + // switch (options.duplicate_field_behavior) { + // .UseFirst => {}, + // .Error => {}, + // .UseLast => {}, + // } + if (options.duplicate_field_behavior == .UseFirst) { + break; + } else if (options.duplicate_field_behavior == .Error) { + return error.DuplicateJSONField; + } else if (options.duplicate_field_behavior == .UseLast) { + parseFree(field.field_type, @field(r, field.name), options); + } + } + @field(r, field.name) = try parse(field.field_type, tokens, options); + fields_seen[i] = true; + found = true; + break; + } + } + if (!found) return error.UnknownField; + }, + else => return error.UnexpectedToken, + } + } + inline for (structInfo.fields) |field, i| { + if (!fields_seen[i]) { + if (field.default_value) |default| { + @field(r, field.name) = default; + } else { + return error.MissingField; + } + } + } + return r; + }, + .Array => |arrayInfo| { + switch (token) { + .ArrayBegin => { + var r: T = undefined; + var i: usize = 0; + errdefer { + while (true) : (i -= 1) { + parseFree(arrayInfo.child, r[i], options); + if (i == 0) break; + } + } + while (i < r.len) : (i += 1) { + r[i] = try parse(arrayInfo.child, tokens, options); + } + const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; + switch (tok) { + .ArrayEnd => {}, + else => return error.UnexpectedToken, + } + return r; + }, + .String => |stringToken| { + if (arrayInfo.child != u8) return error.UnexpectedToken; + var r: T = undefined; + const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); + switch (stringToken.escapes) { + .None => mem.copy(u8, &r, source_slice), + .Some => try unescapeString(&r, source_slice), + } + return r; + }, + else => return error.UnexpectedToken, + } + }, + .Pointer => |ptrInfo| { + const allocator = options.allocator orelse return error.AllocatorRequired; + switch (ptrInfo.size) { + .One => { + const r: T = allocator.create(ptrInfo.child); + r.* = try parseInternal(ptrInfo.child, token, tokens, options); + return r; + }, + .Slice => { + switch (token) { + .ArrayBegin => { + var arraylist = std.ArrayList(ptrInfo.child).init(allocator); + errdefer { + while (arraylist.popOrNull()) |v| { + parseFree(ptrInfo.child, v, options); + } + arraylist.deinit(); + } + + while (true) { + const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson; + switch (tok) { + .ArrayEnd => break, + else => {}, + } + + try arraylist.ensureCapacity(arraylist.len + 1); + const v = try parseInternal(ptrInfo.child, tok, tokens, options); + arraylist.appendAssumeCapacity(v); + } + return arraylist.toOwnedSlice(); + }, + .String => |stringToken| { + if (ptrInfo.child != u8) return error.UnexpectedToken; + const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); + switch (stringToken.escapes) { + .None => return mem.dupe(allocator, u8, source_slice), + .Some => |some_escapes| { + const output = try allocator.alloc(u8, stringToken.decodedLength()); + errdefer allocator.free(output); + try unescapeString(output, source_slice); + return output; + }, + } + }, + else => return error.UnexpectedToken, + } + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + }, + else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"), + } + unreachable; +} + +pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) !T { + const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson; + return parseInternal(T, token, tokens, options); +} + +/// Releases resources created by `parse`. +/// Should be called with the same type and `ParseOptions` that were passed to `parse` +pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { + switch (@typeInfo(T)) { + .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {}, + .Optional => { + if (value) |v| { + return parseFree(@TypeOf(v), v, options); + } + }, + .Union => |unionInfo| { + if (unionInfo.tag_type) |UnionTagType| { + inline for (unionInfo.fields) |u_field| { + if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) { + parseFree(u_field.field_type, @field(value, u_field.name), options); + break; + } + } + } else { + unreachable; + } + }, + .Struct => |structInfo| { + inline for (structInfo.fields) |field| { + parseFree(field.field_type, @field(value, field.name), options); + } + }, + .Array => |arrayInfo| { + for (value) |v| { + parseFree(arrayInfo.child, v, options); + } + }, + .Pointer => |ptrInfo| { + const allocator = options.allocator orelse unreachable; + switch (ptrInfo.size) { + .One => { + parseFree(ptrInfo.child, value.*, options); + allocator.destroy(v); + }, + .Slice => { + for (value) |v| { + parseFree(ptrInfo.child, v, options); + } + allocator.free(value); + }, + else => unreachable, + } + }, + else => unreachable, + } +} + +test "parse" { + testing.expectEqual(false, try parse(bool, &TokenStream.init("false"), ParseOptions{})); + testing.expectEqual(true, try parse(bool, &TokenStream.init("true"), ParseOptions{})); + testing.expectEqual(@as(u1, 1), try parse(u1, &TokenStream.init("1"), ParseOptions{})); + testing.expectError(error.Overflow, parse(u1, &TokenStream.init("50"), ParseOptions{})); + testing.expectEqual(@as(u64, 42), try parse(u64, &TokenStream.init("42"), ParseOptions{})); + testing.expectEqual(@as(f64, 42), try parse(f64, &TokenStream.init("42.0"), ParseOptions{})); + testing.expectEqual(@as(?bool, null), try parse(?bool, &TokenStream.init("null"), ParseOptions{})); + testing.expectEqual(@as(?bool, true), try parse(?bool, &TokenStream.init("true"), ParseOptions{})); + + testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("\"foo\""), ParseOptions{})); + testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("[102, 111, 111]"), ParseOptions{})); +} + +test "parse into enum" { + const T = extern enum { + Foo = 42, + Bar, + @"with\\escape", + }; + testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("\"Foo\""), ParseOptions{})); + testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("42"), ParseOptions{})); + testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &TokenStream.init("\"with\\\\escape\""), ParseOptions{})); + testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("5"), ParseOptions{})); + testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("\"Qux\""), ParseOptions{})); +} + +test "parse into that allocates a slice" { + testing.expectError(error.AllocatorRequired, parse([]u8, &TokenStream.init("\"foo\""), ParseOptions{})); + + const options = ParseOptions{ .allocator = testing.allocator }; + { + const r = try parse([]u8, &TokenStream.init("\"foo\""), options); + defer parseFree([]u8, r, options); + testing.expectEqualSlices(u8, "foo", r); + } + { + const r = try parse([]u8, &TokenStream.init("[102, 111, 111]"), options); + defer parseFree([]u8, r, options); + testing.expectEqualSlices(u8, "foo", r); + } + { + const r = try parse([]u8, &TokenStream.init("\"with\\\\escape\""), options); + defer parseFree([]u8, r, options); + testing.expectEqualSlices(u8, "with\\escape", r); + } +} + +test "parse into tagged union" { + { + const T = union(enum) { + int: i32, + float: f64, + string: []const u8, + }; + testing.expectEqual(T{ .float = 1.5 }, try parse(T, &TokenStream.init("1.5"), ParseOptions{})); + } + + { // if union matches string member, fails with NoUnionMembersMatched rather than AllocatorRequired + // Note that this behaviour wasn't necessarily by design, but was + // what fell out of the implementation and may result in interesting + // API breakage if changed + const T = union(enum) { + int: i32, + float: f64, + string: []const u8, + }; + testing.expectError(error.NoUnionMembersMatched, parse(T, &TokenStream.init("\"foo\""), ParseOptions{})); + } + + { // failing allocations should be bubbled up instantly without trying next member + var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0); + const options = ParseOptions{ .allocator = &fail_alloc.allocator }; + const T = union(enum) { + // both fields here match the input + string: []const u8, + array: [3]u8, + }; + testing.expectError(error.OutOfMemory, parse(T, &TokenStream.init("[1,2,3]"), options)); + } + + { + // if multiple matches possible, takes first option + const T = union(enum) { + x: u8, + y: u8, + }; + testing.expectEqual(T{ .x = 42 }, try parse(T, &TokenStream.init("42"), ParseOptions{})); + } +} + +test "parseFree descends into tagged union" { + var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1); + const options = ParseOptions{ .allocator = &fail_alloc.allocator }; + const T = union(enum) { + int: i32, + float: f64, + string: []const u8, + }; + // use a string with unicode escape so we know result can't be a reference to global constant + const r = try parse(T, &TokenStream.init("\"with\\u0105unicode\""), options); + testing.expectEqual(@TagType(T).string, @as(@TagType(T), r)); + testing.expectEqualSlices(u8, "withąunicode", r.string); + testing.expectEqual(@as(usize, 0), fail_alloc.deallocations); + parseFree(T, r, options); + testing.expectEqual(@as(usize, 1), fail_alloc.deallocations); +} + +test "parse into struct with no fields" { + const T = struct {}; + testing.expectEqual(T{}, try parse(T, &TokenStream.init("{}"), ParseOptions{})); +} + +test "parse into struct with misc fields" { + @setEvalBranchQuota(10000); + const options = ParseOptions{ .allocator = testing.allocator }; + const T = struct { + int: i64, + float: f64, + @"with\\escape": bool, + @"withąunicode😂": bool, + language: []const u8, + optional: ?bool, + default_field: i32 = 42, + static_array: [3]f64, + dynamic_array: []f64, + + const Bar = struct { + nested: []const u8, + }; + complex: Bar, + + const Baz = struct { + foo: []const u8, + }; + veryComplex: []Baz, + + const Union = union(enum) { + x: u8, + float: f64, + string: []const u8, + }; + a_union: Union, + }; + const r = try parse(T, &TokenStream.init( + \\{ + \\ "int": 420, + \\ "float": 3.14, + \\ "with\\escape": true, + \\ "with\u0105unicode\ud83d\ude02": false, + \\ "language": "zig", + \\ "optional": null, + \\ "static_array": [66.6, 420.420, 69.69], + \\ "dynamic_array": [66.6, 420.420, 69.69], + \\ "complex": { + \\ "nested": "zig" + \\ }, + \\ "veryComplex": [ + \\ { + \\ "foo": "zig" + \\ }, { + \\ "foo": "rocks" + \\ } + \\ ], + \\ "a_union": 100000 + \\} + ), options); + defer parseFree(T, r, options); + testing.expectEqual(@as(i64, 420), r.int); + testing.expectEqual(@as(f64, 3.14), r.float); + testing.expectEqual(true, r.@"with\\escape"); + testing.expectEqual(false, r.@"withąunicode😂"); + testing.expectEqualSlices(u8, "zig", r.language); + testing.expectEqual(@as(?bool, null), r.optional); + testing.expectEqual(@as(i32, 42), r.default_field); + testing.expectEqual(@as(f64, 66.6), r.static_array[0]); + testing.expectEqual(@as(f64, 420.420), r.static_array[1]); + testing.expectEqual(@as(f64, 69.69), r.static_array[2]); + testing.expectEqual(@as(usize, 3), r.dynamic_array.len); + testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]); + testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]); + testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]); + testing.expectEqualSlices(u8, r.complex.nested, "zig"); + testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo); + testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo); + testing.expectEqual(T.Union{ .float = 100000 }, r.a_union); +} + /// A non-stream JSON parser which constructs a tree of Value's. pub const Parser = struct { allocator: *Allocator, |
