character literals: allow unicode escapes

also make the documentation for character literals more clear. closes #2089 see #2097
author: Andrew Kelley <andrew@ziglang.org> 2019-03-23 17:35:21 -0400
committer: Andrew Kelley <andrew@ziglang.org> 2019-03-23 17:35:21 -0400
commit: 89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch)
tree: 42f76e6da37b08e0022af060545c5e5d3f6bd476 /std
parent: 55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff)
download: zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz
zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip
2 files changed, 28 insertions, 16 deletions
diff --git a/std/zig/parser_test.zig b/std/zig/parser_test.zig
index 508813759c..8b9c0c2d64 100644
--- a/std/zig/parser_test.zig
+++ b/std/zig/parser_test.zig
@@ -1,3 +1,10 @@
+test "zig fmt: character literal larger than u8" {
+    try testCanonical(
+        \\const x = '\U01f4a9';
+        \\
+    );
+}
+
 test "zig fmt: infix operator and then multiline string literal" {
     try testCanonical(
         \\const x = "" ++
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 2159371ccf..19d64514a1 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -236,8 +236,7 @@ pub const Tokenizer = struct {
         MultilineStringLiteralLine,
         CharLiteral,
         CharLiteralBackslash,
-        CharLiteralEscape1,
-        CharLiteralEscape2,
+        CharLiteralHexEscape,
         CharLiteralEnd,
         Backslash,
         Equal,
@@ -293,6 +292,8 @@ pub const Tokenizer = struct {
             .start = self.index,
             .end = undefined,
         };
+        var seen_escape_digits: usize = undefined;
+        var expected_escape_digits: usize = undefined;
         while (self.index < self.buffer.len) : (self.index += 1) {
             const c = self.buffer[self.index];
             switch (state) {
@@ -658,26 +659,31 @@ pub const Tokenizer = struct {
                         break;
                     },
                     'x' => {
-                        state = State.CharLiteralEscape1;
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 2;
                     },
-                    else => {
-                        state = State.CharLiteralEnd;
+                    'u' => {
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 4;
                     },
-                },
-
-                State.CharLiteralEscape1 => switch (c) {
-                    '0'...'9', 'a'...'z', 'A'...'F' => {
-                        state = State.CharLiteralEscape2;
+                    'U' => {
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 6;
                     },
                     else => {
-                        result.id = Token.Id.Invalid;
-                        break;
+                        state = State.CharLiteralEnd;
                     },
                 },
 
-                State.CharLiteralEscape2 => switch (c) {
+                State.CharLiteralHexEscape => switch (c) {
                     '0'...'9', 'a'...'z', 'A'...'F' => {
-                        state = State.CharLiteralEnd;
+                        seen_escape_digits += 1;
+                        if (seen_escape_digits == expected_escape_digits) {
+                            state = State.CharLiteralEnd;
+                        }
                     },
                     else => {
                         result.id = Token.Id.Invalid;
@@ -1045,8 +1051,7 @@ pub const Tokenizer = struct {
                 State.Backslash,
                 State.CharLiteral,
                 State.CharLiteralBackslash,
-                State.CharLiteralEscape1,
-                State.CharLiteralEscape2,
+                State.CharLiteralHexEscape,
                 State.CharLiteralEnd,
                 State.StringLiteralBackslash,
                 State.LBracketStar,
author	Andrew Kelley <andrew@ziglang.org>	2019-03-23 17:35:21 -0400
committer	Andrew Kelley <andrew@ziglang.org>	2019-03-23 17:35:21 -0400
commit	89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch)
tree	42f76e6da37b08e0022af060545c5e5d3f6bd476 /std
parent	55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff)
download	zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip