diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2019-03-23 17:35:21 -0400 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2019-03-23 17:35:21 -0400 |
| commit | 89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch) | |
| tree | 42f76e6da37b08e0022af060545c5e5d3f6bd476 /std | |
| parent | 55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff) | |
| download | zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip | |
character literals: allow unicode escapes
also make the documentation for character literals more clear.
closes #2089
see #2097
Diffstat (limited to 'std')
| -rw-r--r-- | std/zig/parser_test.zig | 7 | ||||
| -rw-r--r-- | std/zig/tokenizer.zig | 37 |
2 files changed, 28 insertions, 16 deletions
diff --git a/std/zig/parser_test.zig b/std/zig/parser_test.zig index 508813759c..8b9c0c2d64 100644 --- a/std/zig/parser_test.zig +++ b/std/zig/parser_test.zig @@ -1,3 +1,10 @@ +test "zig fmt: character literal larger than u8" { + try testCanonical( + \\const x = '\U01f4a9'; + \\ + ); +} + test "zig fmt: infix operator and then multiline string literal" { try testCanonical( \\const x = "" ++ diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index 2159371ccf..19d64514a1 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -236,8 +236,7 @@ pub const Tokenizer = struct { MultilineStringLiteralLine, CharLiteral, CharLiteralBackslash, - CharLiteralEscape1, - CharLiteralEscape2, + CharLiteralHexEscape, CharLiteralEnd, Backslash, Equal, @@ -293,6 +292,8 @@ pub const Tokenizer = struct { .start = self.index, .end = undefined, }; + var seen_escape_digits: usize = undefined; + var expected_escape_digits: usize = undefined; while (self.index < self.buffer.len) : (self.index += 1) { const c = self.buffer[self.index]; switch (state) { @@ -658,26 +659,31 @@ pub const Tokenizer = struct { break; }, 'x' => { - state = State.CharLiteralEscape1; + state = State.CharLiteralHexEscape; + seen_escape_digits = 0; + expected_escape_digits = 2; }, - else => { - state = State.CharLiteralEnd; + 'u' => { + state = State.CharLiteralHexEscape; + seen_escape_digits = 0; + expected_escape_digits = 4; }, - }, - - State.CharLiteralEscape1 => switch (c) { - '0'...'9', 'a'...'z', 'A'...'F' => { - state = State.CharLiteralEscape2; + 'U' => { + state = State.CharLiteralHexEscape; + seen_escape_digits = 0; + expected_escape_digits = 6; }, else => { - result.id = Token.Id.Invalid; - break; + state = State.CharLiteralEnd; }, }, - State.CharLiteralEscape2 => switch (c) { + State.CharLiteralHexEscape => switch (c) { '0'...'9', 'a'...'z', 'A'...'F' => { - state = State.CharLiteralEnd; + seen_escape_digits += 1; + if (seen_escape_digits == expected_escape_digits) { + state = State.CharLiteralEnd; + } }, else => { result.id = Token.Id.Invalid; @@ -1045,8 +1051,7 @@ pub const Tokenizer = struct { State.Backslash, State.CharLiteral, State.CharLiteralBackslash, - State.CharLiteralEscape1, - State.CharLiteralEscape2, + State.CharLiteralHexEscape, State.CharLiteralEnd, State.StringLiteralBackslash, State.LBracketStar, |
