diff options
| author | Phil Schumann <metaleap@users.noreply.github.com> | 2020-04-08 02:27:18 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-04-07 20:27:18 -0400 |
| commit | b109186dd5e11e3da2f0f49de370e2b8447e92a0 (patch) | |
| tree | cced2835d93f8c351c3d91e4dbd835428d92ec1f | |
| parent | 66b2477ab668447c6cbff68770de14ec8db991be (diff) | |
| download | zig-b109186dd5e11e3da2f0f49de370e2b8447e92a0.tar.gz zig-b109186dd5e11e3da2f0f49de370e2b8447e92a0.zip | |
std/zig/parse_string_literal.zig: add hex+unicode escapes (#4678)
| -rw-r--r-- | lib/std/zig/parse_string_literal.zig | 63 |
1 files changed, 56 insertions, 7 deletions
diff --git a/lib/std/zig/parse_string_literal.zig b/lib/std/zig/parse_string_literal.zig index a6bdff4a02..949940f550 100644 --- a/lib/std/zig/parse_string_literal.zig +++ b/lib/std/zig/parse_string_literal.zig @@ -19,17 +19,19 @@ pub fn parseStringLiteral( bytes: []const u8, bad_index: *usize, // populated if error.InvalidCharacter is returned ) ParseStringLiteralError![]u8 { - const first_index = if (bytes[0] == 'c') @as(usize, 2) else @as(usize, 1); - assert(bytes[bytes.len - 1] == '"'); + assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"'); var list = std.ArrayList(u8).init(allocator); errdefer list.deinit(); - const slice = bytes[first_index..]; + const slice = bytes[1..]; try list.ensureCapacity(slice.len - 1); var state = State.Start; - for (slice) |b, index| { + var index: usize = 0; + while (index < slice.len) : (index += 1) { + const b = slice[index]; + switch (state) { State.Start => switch (b) { '\\' => state = State.Backslash, @@ -41,9 +43,6 @@ pub fn parseStringLiteral( else => try list.append(b), }, State.Backslash => switch (b) { - 'x' => @panic("TODO"), - 'u' => @panic("TODO"), - 'U' => @panic("TODO"), 'n' => { try list.append('\n'); state = State.Start; @@ -60,10 +59,46 @@ pub fn parseStringLiteral( try list.append('\t'); state = State.Start; }, + '\'' => { + try list.append('\''); + state = State.Start; + }, '"' => { try list.append('"'); state = State.Start; }, + 'x' => { + // TODO: add more/better/broader tests for this. + const index_continue = index + 3; + if (slice.len >= index_continue) + if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| { + try list.append(char); + state = State.Start; + index = index_continue - 1; // loop-header increments again + continue; + } else |_| {}; + + bad_index.* = index; + return error.InvalidCharacter; + }, + 'u' => { + // TODO: add more/better/broader tests for this. + if (slice.len > index + 2 and slice[index + 1] == '{') + if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| { + const hex_str = slice[index + 2 .. index_end]; + if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| { + if (uint <= 0x10ffff) { + try list.appendSlice(std.mem.toBytes(uint)[0..]); + state = State.Start; + index = index_end; // loop-header increments + continue; + } + } else |_| {} + }; + + bad_index.* = index; + return error.InvalidCharacter; + }, else => { bad_index.* = index; return error.InvalidCharacter; @@ -74,3 +109,17 @@ pub fn parseStringLiteral( } unreachable; } + +test "parseStringLiteral" { + const expect = std.testing.expect; + const eql = std.mem.eql; + + var fixed_buf_mem: [32]u8 = undefined; + var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]); + var alloc = &fixed_buf_alloc.allocator; + var bad_index: usize = undefined; + + expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"foo\"", &bad_index))); + expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"f\x6f\x6f\"", &bad_index))); + expect(eql(u8, "f💯", try parseStringLiteral(alloc, "\"f\u{1f4af}\"", &bad_index))); +} |
