From 24c432608f6b07020fa0b18fc9c868ad6abd9b15 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Jul 2021 00:14:58 -0700 Subject: stage2: improve compile errors from tokenizer In order to not regress the quality of compile errors, some improvements had to be made. * std.zig.parseCharLiteral is improved to return more detailed parse failure information. * tokenizer is improved to handle null bytes in the middle of strings, character literals, and line comments. * validating how many unicode escape digits in string literals is moved to std.zig.parseStringLiteral rather than handled in the tokenizer. * when a tokenizer error occurs, if the reported token is the 'invalid' tag, an error note is added to point to the invalid byte location. Further improvements would be: - Mention the expected set of allowed bytes at this location. - Display the invalid byte (if printable, print it, otherwise escape-print it). --- src/Module.zig | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/Module.zig') diff --git a/src/Module.zig b/src/Module.zig index d37452d99d..2e421ea65b 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -2466,6 +2466,7 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void { defer msg.deinit(); const token_starts = file.tree.tokens.items(.start); + const token_tags = file.tree.tokens.items(.tag); try file.tree.renderError(parse_err, msg.writer()); const err_msg = try gpa.create(ErrorMsg); @@ -2477,6 +2478,14 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void { }, .msg = msg.toOwnedSlice(), }; + if (token_tags[parse_err.token] == .invalid) { + const bad_off = @intCast(u32, file.tree.tokenSlice(parse_err.token).len); + try mod.errNoteNonLazy(.{ + .file_scope = file, + .parent_decl_node = 0, + .lazy = .{ .byte_abs = token_starts[parse_err.token] + bad_off }, + }, err_msg, "invalid byte here", .{}); + } { const lock = comp.mutex.acquire(); -- cgit v1.2.3