diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2021-07-01 00:14:58 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2021-07-02 13:27:35 -0700 |
| commit | 24c432608f6b07020fa0b18fc9c868ad6abd9b15 (patch) | |
| tree | f9650d8c9aa36d6fddd45b1f70847304da16c0f5 /src/AstGen.zig | |
| parent | 3f680abbe2c4d2eeefd0eb73b8af25d1768e6ceb (diff) | |
| download | zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.tar.gz zig-24c432608f6b07020fa0b18fc9c868ad6abd9b15.zip | |
stage2: improve compile errors from tokenizer
In order to not regress the quality of compile errors, some improvements
had to be made.
* std.zig.parseCharLiteral is improved to return more detailed parse
failure information.
* tokenizer is improved to handle null bytes in the middle of strings,
character literals, and line comments.
* validating how many unicode escape digits in string literals is moved
to std.zig.parseStringLiteral rather than handled in the tokenizer.
* when a tokenizer error occurs, if the reported token is the 'invalid'
tag, an error note is added to point to the invalid byte location.
Further improvements would be:
- Mention the expected set of allowed bytes at this location.
- Display the invalid byte (if printable, print it, otherwise
escape-print it).
Diffstat (limited to 'src/AstGen.zig')
| -rw-r--r-- | src/AstGen.zig | 74 |
1 files changed, 65 insertions, 9 deletions
diff --git a/src/AstGen.zig b/src/AstGen.zig index 64f5f012f0..5b9851b0ae 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -6380,20 +6380,76 @@ fn charLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) !Zir.Inst.Ref { const main_token = main_tokens[node]; const slice = tree.tokenSlice(main_token); - var bad_index: usize = undefined; - const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) { - error.InvalidCharacter => { - const bad_byte = slice[bad_index]; + switch (std.zig.parseCharLiteral(slice)) { + .success => |codepoint| { + const result = try gz.addInt(codepoint); + return rvalue(gz, rl, result, node); + }, + .invalid_escape_character => |bad_index| { return astgen.failOff( main_token, @intCast(u32, bad_index), - "invalid character: '{c}'\n", - .{bad_byte}, + "invalid escape character: '{c}'", + .{slice[bad_index]}, ); }, - }; - const result = try gz.addInt(value); - return rvalue(gz, rl, result, node); + .expected_hex_digit => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "expected hex digit, found '{c}'", + .{slice[bad_index]}, + ); + }, + .empty_unicode_escape_sequence => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "empty unicode escape sequence", + .{}, + ); + }, + .expected_hex_digit_or_rbrace => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "expected hex digit or '}}', found '{c}'", + .{slice[bad_index]}, + ); + }, + .unicode_escape_overflow => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "unicode escape too large to be a valid codepoint", + .{}, + ); + }, + .expected_lbrace => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "expected '{{', found '{c}", + .{slice[bad_index]}, + ); + }, + .expected_end => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "expected ending single quote ('), found '{c}", + .{slice[bad_index]}, + ); + }, + .invalid_character => |bad_index| { + return astgen.failOff( + main_token, + @intCast(u32, bad_index), + "invalid byte in character literal: '{c}'", + .{slice[bad_index]}, + ); + }, + } } fn integerLiteral(gz: *GenZir, rl: ResultLoc, node: ast.Node.Index) InnerError!Zir.Inst.Ref { |
