stage2: improve compile errors from tokenizer

In order to not regress the quality of compile errors, some improvements had to be made. * std.zig.parseCharLiteral is improved to return more detailed parse failure information. * tokenizer is improved to handle null bytes in the middle of strings, character literals, and line comments. * validating how many unicode escape digits in string literals is moved to std.zig.parseStringLiteral rather than handled in the tokenizer. * when a tokenizer error occurs, if the reported token is the 'invalid' tag, an error note is added to point to the invalid byte location. Further improvements would be: - Mention the expected set of allowed bytes at this location. - Display the invalid byte (if printable, print it, otherwise escape-print it).
author: Andrew Kelley <andrew@ziglang.org> 2021-07-01 00:14:58 -0700
committer: Andrew Kelley <andrew@ziglang.org> 2021-07-02 13:27:35 -0700
commit: 24c432608f6b07020fa0b18fc9c868ad6abd9b15 (patch)
tree: f9650d8c9aa36d6fddd45b1f70847304da16c0f5 /src/AstGen.zig
parent: 3f680abbe2c4d2eeefd0eb73b8af25d1768e6ceb (diff)
download: zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.tar.gz
zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.zip
1 files changed, 65 insertions, 9 deletions
diff --git a/src/AstGen.zig b/src/AstGen.zig
index 64f5f012f0..5b9851b0ae 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -6380,20 +6380,76 @@ fn charLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) !Zir.Inst.Ref {
     const main_token = main_tokens[node];
     const slice = tree.tokenSlice(main_token);
 
-    var bad_index: usize = undefined;
-    const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) {
-        error.InvalidCharacter => {
-            const bad_byte = slice[bad_index];
+    switch (std.zig.parseCharLiteral(slice)) {
+        .success => |codepoint| {
+            const result = try gz.addInt(codepoint);
+            return rvalue(gz, rl, result, node);
+        },
+        .invalid_escape_character => |bad_index| {
             return astgen.failOff(
                 main_token,
                 @intCast(u32, bad_index),
-                "invalid character: '{c}'\n",
-                .{bad_byte},
+                "invalid escape character: '{c}'",
+                .{slice[bad_index]},
             );
         },
-    };
-    const result = try gz.addInt(value);
-    return rvalue(gz, rl, result, node);
+        .expected_hex_digit => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected hex digit, found '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+        .empty_unicode_escape_sequence => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "empty unicode escape sequence",
+                .{},
+            );
+        },
+        .expected_hex_digit_or_rbrace => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected hex digit or '}}', found '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+        .unicode_escape_overflow => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "unicode escape too large to be a valid codepoint",
+                .{},
+            );
+        },
+        .expected_lbrace => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected '{{', found '{c}",
+                .{slice[bad_index]},
+            );
+        },
+        .expected_end => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "expected ending single quote ('), found '{c}",
+                .{slice[bad_index]},
+            );
+        },
+        .invalid_character => |bad_index| {
+            return astgen.failOff(
+                main_token,
+                @intCast(u32, bad_index),
+                "invalid byte in character literal: '{c}'",
+                .{slice[bad_index]},
+            );
+        },
+    }
 }
 
 fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) InnerError!Zir.Inst.Ref {
author	Andrew Kelley <andrew@ziglang.org>	2021-07-01 00:14:58 -0700
committer	Andrew Kelley <andrew@ziglang.org>	2021-07-02 13:27:35 -0700
commit	24c432608f6b07020fa0b18fc9c868ad6abd9b15 (patch)
tree	f9650d8c9aa36d6fddd45b1f70847304da16c0f5 /src/AstGen.zig
parent	3f680abbe2c4d2eeefd0eb73b8af25d1768e6ceb (diff)
download	zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.tar.gz zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.zip