aboutsummaryrefslogtreecommitdiff
path: root/src/AstGen.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2021-07-01 00:14:58 -0700
committerAndrew Kelley <andrew@ziglang.org>2021-07-02 13:27:35 -0700
commit24c432608f6b07020fa0b18fc9c868ad6abd9b15 (patch)
treef9650d8c9aa36d6fddd45b1f70847304da16c0f5 /src/AstGen.zig
parent3f680abbe2c4d2eeefd0eb73b8af25d1768e6ceb (diff)
downloadzig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.tar.gz
zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.zip
stage2: improve compile errors from tokenizer
In order to not regress the quality of compile errors, some improvements had to be made. * std.zig.parseCharLiteral is improved to return more detailed parse failure information. * tokenizer is improved to handle null bytes in the middle of strings, character literals, and line comments. * validating how many unicode escape digits in string literals is moved to std.zig.parseStringLiteral rather than handled in the tokenizer. * when a tokenizer error occurs, if the reported token is the 'invalid' tag, an error note is added to point to the invalid byte location. Further improvements would be: - Mention the expected set of allowed bytes at this location. - Display the invalid byte (if printable, print it, otherwise escape-print it).
Diffstat (limited to 'src/AstGen.zig')
-rw-r--r--src/AstGen.zig74
1 files changed, 65 insertions, 9 deletions
diff --git a/src/AstGen.zig b/src/AstGen.zig
index 64f5f012f0..5b9851b0ae 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -6380,20 +6380,76 @@ fn charLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) !Zir.Inst.Ref {
const main_token = main_tokens[node];
const slice = tree.tokenSlice(main_token);
- var bad_index: usize = undefined;
- const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) {
- error.InvalidCharacter => {
- const bad_byte = slice[bad_index];
+ switch (std.zig.parseCharLiteral(slice)) {
+ .success => |codepoint| {
+ const result = try gz.addInt(codepoint);
+ return rvalue(gz, rl, result, node);
+ },
+ .invalid_escape_character => |bad_index| {
return astgen.failOff(
main_token,
@intCast(u32, bad_index),
- "invalid character: '{c}'\n",
- .{bad_byte},
+ "invalid escape character: '{c}'",
+ .{slice[bad_index]},
);
},
- };
- const result = try gz.addInt(value);
- return rvalue(gz, rl, result, node);
+ .expected_hex_digit => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "expected hex digit, found '{c}'",
+ .{slice[bad_index]},
+ );
+ },
+ .empty_unicode_escape_sequence => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "empty unicode escape sequence",
+ .{},
+ );
+ },
+ .expected_hex_digit_or_rbrace => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "expected hex digit or '}}', found '{c}'",
+ .{slice[bad_index]},
+ );
+ },
+ .unicode_escape_overflow => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "unicode escape too large to be a valid codepoint",
+ .{},
+ );
+ },
+ .expected_lbrace => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "expected '{{', found '{c}",
+ .{slice[bad_index]},
+ );
+ },
+ .expected_end => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "expected ending single quote ('), found '{c}",
+ .{slice[bad_index]},
+ );
+ },
+ .invalid_character => |bad_index| {
+ return astgen.failOff(
+ main_token,
+ @intCast(u32, bad_index),
+ "invalid byte in character literal: '{c}'",
+ .{slice[bad_index]},
+ );
+ },
+ }
}
fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) InnerError!Zir.Inst.Ref {