std: add json.parse to automatically decode json into a struct

author: daurnimator <quae@daurnimator.com> 2019-12-29 17:46:32 +1100
committer: daurnimator <quae@daurnimator.com> 2020-02-19 23:16:35 +1100
commit: d989396a3473836a9e2e269b2be9675574ead389 (patch)
tree: 0bfe0ff21e33d9b188f9f766de420902a4886700 /lib/std/json.zig
parent: 5a2060482042129e64af2b6281f8e4bbf1f2fe6e (diff)
download: zig-d989396a3473836a9e2e269b2be9675574ead389.tar.gz
zig-d989396a3473836a9e2e269b2be9675574ead389.zip
1 files changed, 552 insertions, 0 deletions
diff --git a/lib/std/json.zig b/lib/std/json.zig
index 37ee20f2a2..830f492e74 100644
--- a/lib/std/json.zig
+++ b/lib/std/json.zig
@@ -19,6 +19,74 @@ const StringEscapes = union(enum) {
     },
 };
 
+/// Checks to see if a string matches what it would be as a json-encoded string
+/// Assumes that `encoded` is a well-formed json string
+fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
+    var i: usize = 0;
+    var j: usize = 0;
+    while (i < decoded.len) {
+        if (j >= encoded.len) return false;
+        if (encoded[j] != '\\') {
+            if (decoded[i] != encoded[j]) return false;
+            j += 1;
+            i += 1;
+        } else {
+            const escape_type = encoded[j + 1];
+            if (escape_type != 'u') {
+                const t: u8 = switch (escape_type) {
+                    '\\' => '\\',
+                    '/' => '/',
+                    'n' => '\n',
+                    'r' => '\r',
+                    't' => '\t',
+                    'f' => 12,
+                    'b' => 8,
+                    '"' => '"',
+                    else => unreachable,
+                };
+                if (decoded[i] != t) return false;
+                j += 2;
+                i += 1;
+            } else {
+                var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
+                j += 6;
+                if (codepoint >= 0xD800 and codepoint < 0xDC00) {
+                    // surrogate pair
+                    assert(encoded[j] == '\\');
+                    assert(encoded[j + 1] == 'u');
+                    const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
+                    codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
+                    j += 6;
+                }
+                var buf: [4]u8 = undefined;
+                const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
+                if (i + len > decoded.len) return false;
+                if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
+                i += len;
+            }
+        }
+    }
+    assert(i == decoded.len);
+    assert(j == encoded.len);
+    return true;
+}
+
+test "encodesTo" {
+    // same
+    testing.expectEqual(true, encodesTo("false", "false"));
+    // totally different
+    testing.expectEqual(false, encodesTo("false", "true"));
+    // differnt lengths
+    testing.expectEqual(false, encodesTo("false", "other"));
+    // with escape
+    testing.expectEqual(true, encodesTo("\\", "\\\\"));
+    testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
+    // with unicode
+    testing.expectEqual(true, encodesTo("ą", "\\u0105"));
+    testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
+    testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
+}
+
 /// A single token slice into the parent string.
 ///
 /// Use `token.slice()` on the input at the current position to get the current slice.
@@ -1201,6 +1269,490 @@ pub const Value = union(enum) {
     }
 };
 
+pub const ParseOptions = struct {
+    allocator: ?*Allocator = null,
+
+    /// Behaviour when a duplicate field is encountered.
+    duplicate_field_behavior: enum {
+        UseFirst,
+        Error,
+        UseLast,
+    } = .Error,
+};
+
+fn parseInternal(comptime T: type, token: Token, tokens: *TokenStream, options: ParseOptions) !T {
+    switch (@typeInfo(T)) {
+        .Bool => {
+            return switch (token) {
+                .True => true,
+                .False => false,
+                else => error.UnexpectedToken,
+            };
+        },
+        .Float, .ComptimeFloat => {
+            const numberToken = switch (token) {
+                .Number => |n| n,
+                else => return error.UnexpectedToken,
+            };
+            return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1));
+        },
+        .Int, .ComptimeInt => {
+            const numberToken = switch (token) {
+                .Number => |n| n,
+                else => return error.UnexpectedToken,
+            };
+            if (!numberToken.is_integer) return error.UnexpectedToken;
+            return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
+        },
+        .Optional => |optionalInfo| {
+            if (token == .Null) {
+                return null;
+            } else {
+                return try parseInternal(optionalInfo.child, token, tokens, options);
+            }
+        },
+        .Enum => |enumInfo| {
+            switch (token) {
+                .Number => |numberToken| {
+                    if (!numberToken.is_integer) return error.UnexpectedToken;
+                    const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
+                    return try std.meta.intToEnum(T, n);
+                },
+                .String => |stringToken| {
+                    const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+                    switch (stringToken.escapes) {
+                        .None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
+                        .Some => {
+                            inline for (enumInfo.fields) |field| {
+                                if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
+                                    return @field(T, field.name);
+                                }
+                            }
+                            return error.InvalidEnumTag;
+                        },
+                    }
+                },
+                else => return error.UnexpectedToken,
+            }
+        },
+        .Union => |unionInfo| {
+            if (unionInfo.tag_type) |_| {
+                // try each of the union fields until we find one that matches
+                inline for (unionInfo.fields) |u_field| {
+                    if (parseInternal(u_field.field_type, token, tokens, options)) |value| {
+                        return @unionInit(T, u_field.name, value);
+                    } else |err| {
+                        // Bubble up error.OutOfMemory
+                        // Parsing some types won't have OutOfMemory in their
+                        // error-sets, for the condition to be valid, merge it in.
+                        if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
+                        // otherwise continue through the `inline for`
+                    }
+                }
+                return error.NoUnionMembersMatched;
+            } else {
+                @compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
+            }
+        },
+        .Struct => |structInfo| {
+            switch (token) {
+                .ObjectBegin => {},
+                else => return error.UnexpectedToken,
+            }
+            var r: T = undefined;
+            var fields_seen = [_]bool{false} ** structInfo.fields.len;
+            errdefer {
+                inline for (structInfo.fields) |field, i| {
+                    if (fields_seen[i]) {
+                        parseFree(field.field_type, @field(r, field.name), options);
+                    }
+                }
+            }
+
+            while (true) {
+                switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
+                    .ObjectEnd => break,
+                    .String => |stringToken| {
+                        const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+                        var found = false;
+                        inline for (structInfo.fields) |field, i| {
+                            // TODO: using switches here segfault the compiler (#2727?)
+                            if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) {
+                                // if (switch (stringToken.escapes) {
+                                //     .None => mem.eql(u8, field.name, key_source_slice),
+                                //     .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
+                                // }) {
+                                if (fields_seen[i]) {
+                                    // switch (options.duplicate_field_behavior) {
+                                    //     .UseFirst => {},
+                                    //     .Error => {},
+                                    //     .UseLast => {},
+                                    // }
+                                    if (options.duplicate_field_behavior == .UseFirst) {
+                                        break;
+                                    } else if (options.duplicate_field_behavior == .Error) {
+                                        return error.DuplicateJSONField;
+                                    } else if (options.duplicate_field_behavior == .UseLast) {
+                                        parseFree(field.field_type, @field(r, field.name), options);
+                                    }
+                                }
+                                @field(r, field.name) = try parse(field.field_type, tokens, options);
+                                fields_seen[i] = true;
+                                found = true;
+                                break;
+                            }
+                        }
+                        if (!found) return error.UnknownField;
+                    },
+                    else => return error.UnexpectedToken,
+                }
+            }
+            inline for (structInfo.fields) |field, i| {
+                if (!fields_seen[i]) {
+                    if (field.default_value) |default| {
+                        @field(r, field.name) = default;
+                    } else {
+                        return error.MissingField;
+                    }
+                }
+            }
+            return r;
+        },
+        .Array => |arrayInfo| {
+            switch (token) {
+                .ArrayBegin => {
+                    var r: T = undefined;
+                    var i: usize = 0;
+                    errdefer {
+                        while (true) : (i -= 1) {
+                            parseFree(arrayInfo.child, r[i], options);
+                            if (i == 0) break;
+                        }
+                    }
+                    while (i < r.len) : (i += 1) {
+                        r[i] = try parse(arrayInfo.child, tokens, options);
+                    }
+                    const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+                    switch (tok) {
+                        .ArrayEnd => {},
+                        else => return error.UnexpectedToken,
+                    }
+                    return r;
+                },
+                .String => |stringToken| {
+                    if (arrayInfo.child != u8) return error.UnexpectedToken;
+                    var r: T = undefined;
+                    const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+                    switch (stringToken.escapes) {
+                        .None => mem.copy(u8, &r, source_slice),
+                        .Some => try unescapeString(&r, source_slice),
+                    }
+                    return r;
+                },
+                else => return error.UnexpectedToken,
+            }
+        },
+        .Pointer => |ptrInfo| {
+            const allocator = options.allocator orelse return error.AllocatorRequired;
+            switch (ptrInfo.size) {
+                .One => {
+                    const r: T = allocator.create(ptrInfo.child);
+                    r.* = try parseInternal(ptrInfo.child, token, tokens, options);
+                    return r;
+                },
+                .Slice => {
+                    switch (token) {
+                        .ArrayBegin => {
+                            var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
+                            errdefer {
+                                while (arraylist.popOrNull()) |v| {
+                                    parseFree(ptrInfo.child, v, options);
+                                }
+                                arraylist.deinit();
+                            }
+
+                            while (true) {
+                                const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+                                switch (tok) {
+                                    .ArrayEnd => break,
+                                    else => {},
+                                }
+
+                                try arraylist.ensureCapacity(arraylist.len + 1);
+                                const v = try parseInternal(ptrInfo.child, tok, tokens, options);
+                                arraylist.appendAssumeCapacity(v);
+                            }
+                            return arraylist.toOwnedSlice();
+                        },
+                        .String => |stringToken| {
+                            if (ptrInfo.child != u8) return error.UnexpectedToken;
+                            const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
+                            switch (stringToken.escapes) {
+                                .None => return mem.dupe(allocator, u8, source_slice),
+                                .Some => |some_escapes| {
+                                    const output = try allocator.alloc(u8, stringToken.decodedLength());
+                                    errdefer allocator.free(output);
+                                    try unescapeString(output, source_slice);
+                                    return output;
+                                },
+                            }
+                        },
+                        else => return error.UnexpectedToken,
+                    }
+                },
+                else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+            }
+        },
+        else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
+    }
+    unreachable;
+}
+
+pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) !T {
+    const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
+    return parseInternal(T, token, tokens, options);
+}
+
+/// Releases resources created by `parse`.
+/// Should be called with the same type and `ParseOptions` that were passed to `parse`
+pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
+    switch (@typeInfo(T)) {
+        .Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
+        .Optional => {
+            if (value) |v| {
+                return parseFree(@TypeOf(v), v, options);
+            }
+        },
+        .Union => |unionInfo| {
+            if (unionInfo.tag_type) |UnionTagType| {
+                inline for (unionInfo.fields) |u_field| {
+                    if (@enumToInt(@as(UnionTagType, value)) == u_field.enum_field.?.value) {
+                        parseFree(u_field.field_type, @field(value, u_field.name), options);
+                        break;
+                    }
+                }
+            } else {
+                unreachable;
+            }
+        },
+        .Struct => |structInfo| {
+            inline for (structInfo.fields) |field| {
+                parseFree(field.field_type, @field(value, field.name), options);
+            }
+        },
+        .Array => |arrayInfo| {
+            for (value) |v| {
+                parseFree(arrayInfo.child, v, options);
+            }
+        },
+        .Pointer => |ptrInfo| {
+            const allocator = options.allocator orelse unreachable;
+            switch (ptrInfo.size) {
+                .One => {
+                    parseFree(ptrInfo.child, value.*, options);
+                    allocator.destroy(v);
+                },
+                .Slice => {
+                    for (value) |v| {
+                        parseFree(ptrInfo.child, v, options);
+                    }
+                    allocator.free(value);
+                },
+                else => unreachable,
+            }
+        },
+        else => unreachable,
+    }
+}
+
+test "parse" {
+    testing.expectEqual(false, try parse(bool, &TokenStream.init("false"), ParseOptions{}));
+    testing.expectEqual(true, try parse(bool, &TokenStream.init("true"), ParseOptions{}));
+    testing.expectEqual(@as(u1, 1), try parse(u1, &TokenStream.init("1"), ParseOptions{}));
+    testing.expectError(error.Overflow, parse(u1, &TokenStream.init("50"), ParseOptions{}));
+    testing.expectEqual(@as(u64, 42), try parse(u64, &TokenStream.init("42"), ParseOptions{}));
+    testing.expectEqual(@as(f64, 42), try parse(f64, &TokenStream.init("42.0"), ParseOptions{}));
+    testing.expectEqual(@as(?bool, null), try parse(?bool, &TokenStream.init("null"), ParseOptions{}));
+    testing.expectEqual(@as(?bool, true), try parse(?bool, &TokenStream.init("true"), ParseOptions{}));
+
+    testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
+    testing.expectEqual(@as([3]u8, "foo".*), try parse([3]u8, &TokenStream.init("[102, 111, 111]"), ParseOptions{}));
+}
+
+test "parse into enum" {
+    const T = extern enum {
+        Foo = 42,
+        Bar,
+        @"with\\escape",
+    };
+    testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("\"Foo\""), ParseOptions{}));
+    testing.expectEqual(@as(T, .Foo), try parse(T, &TokenStream.init("42"), ParseOptions{}));
+    testing.expectEqual(@as(T, .@"with\\escape"), try parse(T, &TokenStream.init("\"with\\\\escape\""), ParseOptions{}));
+    testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("5"), ParseOptions{}));
+    testing.expectError(error.InvalidEnumTag, parse(T, &TokenStream.init("\"Qux\""), ParseOptions{}));
+}
+
+test "parse into that allocates a slice" {
+    testing.expectError(error.AllocatorRequired, parse([]u8, &TokenStream.init("\"foo\""), ParseOptions{}));
+
+    const options = ParseOptions{ .allocator = testing.allocator };
+    {
+        const r = try parse([]u8, &TokenStream.init("\"foo\""), options);
+        defer parseFree([]u8, r, options);
+        testing.expectEqualSlices(u8, "foo", r);
+    }
+    {
+        const r = try parse([]u8, &TokenStream.init("[102, 111, 111]"), options);
+        defer parseFree([]u8, r, options);
+        testing.expectEqualSlices(u8, "foo", r);
+    }
+    {
+        const r = try parse([]u8, &TokenStream.init("\"with\\\\escape\""), options);
+        defer parseFree([]u8, r, options);
+        testing.expectEqualSlices(u8, "with\\escape", r);
+    }
+}
+
+test "parse into tagged union" {
+    {
+        const T = union(enum) {
+            int: i32,
+            float: f64,
+            string: []const u8,
+        };
+        testing.expectEqual(T{ .float = 1.5 }, try parse(T, &TokenStream.init("1.5"), ParseOptions{}));
+    }
+
+    { // if union matches string member, fails with NoUnionMembersMatched rather than AllocatorRequired
+        // Note that this behaviour wasn't necessarily by design, but was
+        // what fell out of the implementation and may result in interesting
+        // API breakage if changed
+        const T = union(enum) {
+            int: i32,
+            float: f64,
+            string: []const u8,
+        };
+        testing.expectError(error.NoUnionMembersMatched, parse(T, &TokenStream.init("\"foo\""), ParseOptions{}));
+    }
+
+    { // failing allocations should be bubbled up instantly without trying next member
+        var fail_alloc = testing.FailingAllocator.init(testing.allocator, 0);
+        const options = ParseOptions{ .allocator = &fail_alloc.allocator };
+        const T = union(enum) {
+            // both fields here match the input
+            string: []const u8,
+            array: [3]u8,
+        };
+        testing.expectError(error.OutOfMemory, parse(T, &TokenStream.init("[1,2,3]"), options));
+    }
+
+    {
+        // if multiple matches possible, takes first option
+        const T = union(enum) {
+            x: u8,
+            y: u8,
+        };
+        testing.expectEqual(T{ .x = 42 }, try parse(T, &TokenStream.init("42"), ParseOptions{}));
+    }
+}
+
+test "parseFree descends into tagged union" {
+    var fail_alloc = testing.FailingAllocator.init(testing.allocator, 1);
+    const options = ParseOptions{ .allocator = &fail_alloc.allocator };
+    const T = union(enum) {
+        int: i32,
+        float: f64,
+        string: []const u8,
+    };
+    // use a string with unicode escape so we know result can't be a reference to global constant
+    const r = try parse(T, &TokenStream.init("\"with\\u0105unicode\""), options);
+    testing.expectEqual(@TagType(T).string, @as(@TagType(T), r));
+    testing.expectEqualSlices(u8, "withąunicode", r.string);
+    testing.expectEqual(@as(usize, 0), fail_alloc.deallocations);
+    parseFree(T, r, options);
+    testing.expectEqual(@as(usize, 1), fail_alloc.deallocations);
+}
+
+test "parse into struct with no fields" {
+    const T = struct {};
+    testing.expectEqual(T{}, try parse(T, &TokenStream.init("{}"), ParseOptions{}));
+}
+
+test "parse into struct with misc fields" {
+    @setEvalBranchQuota(10000);
+    const options = ParseOptions{ .allocator = testing.allocator };
+    const T = struct {
+        int: i64,
+        float: f64,
+        @"with\\escape": bool,
+        @"withąunicode😂": bool,
+        language: []const u8,
+        optional: ?bool,
+        default_field: i32 = 42,
+        static_array: [3]f64,
+        dynamic_array: []f64,
+
+        const Bar = struct {
+            nested: []const u8,
+        };
+        complex: Bar,
+
+        const Baz = struct {
+            foo: []const u8,
+        };
+        veryComplex: []Baz,
+
+        const Union = union(enum) {
+            x: u8,
+            float: f64,
+            string: []const u8,
+        };
+        a_union: Union,
+    };
+    const r = try parse(T, &TokenStream.init(
+        \\{
+        \\  "int": 420,
+        \\  "float": 3.14,
+        \\  "with\\escape": true,
+        \\  "with\u0105unicode\ud83d\ude02": false,
+        \\  "language": "zig",
+        \\  "optional": null,
+        \\  "static_array": [66.6, 420.420, 69.69],
+        \\  "dynamic_array": [66.6, 420.420, 69.69],
+        \\  "complex": {
+        \\    "nested": "zig"
+        \\  },
+        \\  "veryComplex": [
+        \\    {
+        \\      "foo": "zig"
+        \\    }, {
+        \\      "foo": "rocks"
+        \\    }
+        \\  ],
+        \\  "a_union": 100000
+        \\}
+    ), options);
+    defer parseFree(T, r, options);
+    testing.expectEqual(@as(i64, 420), r.int);
+    testing.expectEqual(@as(f64, 3.14), r.float);
+    testing.expectEqual(true, r.@"with\\escape");
+    testing.expectEqual(false, r.@"withąunicode😂");
+    testing.expectEqualSlices(u8, "zig", r.language);
+    testing.expectEqual(@as(?bool, null), r.optional);
+    testing.expectEqual(@as(i32, 42), r.default_field);
+    testing.expectEqual(@as(f64, 66.6), r.static_array[0]);
+    testing.expectEqual(@as(f64, 420.420), r.static_array[1]);
+    testing.expectEqual(@as(f64, 69.69), r.static_array[2]);
+    testing.expectEqual(@as(usize, 3), r.dynamic_array.len);
+    testing.expectEqual(@as(f64, 66.6), r.dynamic_array[0]);
+    testing.expectEqual(@as(f64, 420.420), r.dynamic_array[1]);
+    testing.expectEqual(@as(f64, 69.69), r.dynamic_array[2]);
+    testing.expectEqualSlices(u8, r.complex.nested, "zig");
+    testing.expectEqualSlices(u8, "zig", r.veryComplex[0].foo);
+    testing.expectEqualSlices(u8, "rocks", r.veryComplex[1].foo);
+    testing.expectEqual(T.Union{ .float = 100000 }, r.a_union);
+}
+
 /// A non-stream JSON parser which constructs a tree of Value's.
 pub const Parser = struct {
     allocator: *Allocator,
author	daurnimator <quae@daurnimator.com>	2019-12-29 17:46:32 +1100
committer	daurnimator <quae@daurnimator.com>	2020-02-19 23:16:35 +1100
commit	d989396a3473836a9e2e269b2be9675574ead389 (patch)
tree	0bfe0ff21e33d9b188f9f766de420902a4886700 /lib/std/json.zig
parent	5a2060482042129e64af2b6281f8e4bbf1f2fe6e (diff)
download	zig-d989396a3473836a9e2e269b2be9675574ead389.tar.gz zig-d989396a3473836a9e2e269b2be9675574ead389.zip