2 files changed, 28 insertions, 16 deletions
diff --git a/std/zig/parser_test.zig b/std/zig/parser_test.zig
index 508813759c..8b9c0c2d64 100644
--- a/std/zig/parser_test.zig
+++ b/std/zig/parser_test.zig
@@ -1,3 +1,10 @@
+test "zig fmt: character literal larger than u8" {
+    try testCanonical(
+        \\const x = '\U01f4a9';
+        \\
+    );
+}
+
 test "zig fmt: infix operator and then multiline string literal" {
     try testCanonical(
         \\const x = "" ++
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 2159371ccf..19d64514a1 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -236,8 +236,7 @@ pub const Tokenizer = struct {
         MultilineStringLiteralLine,
         CharLiteral,
         CharLiteralBackslash,
-        CharLiteralEscape1,
-        CharLiteralEscape2,
+        CharLiteralHexEscape,
         CharLiteralEnd,
         Backslash,
         Equal,
@@ -293,6 +292,8 @@ pub const Tokenizer = struct {
             .start = self.index,
             .end = undefined,
         };
+        var seen_escape_digits: usize = undefined;
+        var expected_escape_digits: usize = undefined;
         while (self.index < self.buffer.len) : (self.index += 1) {
             const c = self.buffer[self.index];
             switch (state) {
@@ -658,26 +659,31 @@ pub const Tokenizer = struct {
                         break;
                     },
                     'x' => {
-                        state = State.CharLiteralEscape1;
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 2;
                     },
-                    else => {
-                        state = State.CharLiteralEnd;
+                    'u' => {
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 4;
                     },
-                },
-
-                State.CharLiteralEscape1 => switch (c) {
-                    '0'...'9', 'a'...'z', 'A'...'F' => {
-                        state = State.CharLiteralEscape2;
+                    'U' => {
+                        state = State.CharLiteralHexEscape;
+                        seen_escape_digits = 0;
+                        expected_escape_digits = 6;
                     },
                     else => {
-                        result.id = Token.Id.Invalid;
-                        break;
+                        state = State.CharLiteralEnd;
                     },
                 },
 
-                State.CharLiteralEscape2 => switch (c) {
+                State.CharLiteralHexEscape => switch (c) {
                     '0'...'9', 'a'...'z', 'A'...'F' => {
-                        state = State.CharLiteralEnd;
+                        seen_escape_digits += 1;
+                        if (seen_escape_digits == expected_escape_digits) {
+                            state = State.CharLiteralEnd;
+                        }
                     },
                     else => {
                         result.id = Token.Id.Invalid;
@@ -1045,8 +1051,7 @@ pub const Tokenizer = struct {
                 State.Backslash,
                 State.CharLiteral,
                 State.CharLiteralBackslash,
-                State.CharLiteralEscape1,
-                State.CharLiteralEscape2,
+                State.CharLiteralHexEscape,
                 State.CharLiteralEnd,
                 State.StringLiteralBackslash,
                 State.LBracketStar,