From 346ec15c5005e523c2a1d4b967ee7a4e5d1e9775 Mon Sep 17 00:00:00 2001 From: Tom Read Cutting Date: Sun, 19 Feb 2023 12:14:03 +0000 Subject: Correctly handle carriage return characters according to the spec (#12661) * Scan from line start when finding tag in tokenizer This resolves a crash that can occur for invalid bytes like carriage returns that are valid characters when not parsed from within literals. There are potentially other edge cases this could resolve as well, as the calling code for this function didn't account for any potential 'pending_invalid_tokens' that could be queued up by the tokenizer from within another state. * Fix carriage return crash in multiline string Follow the guidance of #38: > However CR directly before NL is interpreted as only a newline and not part of the multiline string. zig fmt will delete the CR. Zig fmt already had code for deleting carriage returns, but would still crash - now it no longer does so. Carriage returns encountered before line-feeds are now appropriately removed on program compilation as well. * Only accept carriage returns before line feeds Previous commit was much less strict about this, this more closely matches the desired spec of only allow CR characters in a CRLF pair, but not otherwise. * Fix CR being rejected when used as whitespace Missed this comment from ziglang/zig-spec#83: > CR used as whitespace, whether directly preceding NL or stray, is still unambiguously whitespace. It is accepted by the grammar and replaced by the canonical whitespace by zig fmt. * Add tests for carriage return handling --- test/compile_errors.zig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'test/compile_errors.zig') diff --git a/test/compile_errors.zig b/test/compile_errors.zig index 801d0464ea..21c8822eb3 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -174,6 +174,16 @@ pub fn addCases(ctx: *TestContext) !void { }); } + { + const case = ctx.obj("isolated carriage return in multiline string literal", .{}); + case.backend = .stage2; + + case.addError("const foo = \\\\\test\r\r rogue carriage return\n;", &[_][]const u8{ + ":1:19: error: expected ';' after declaration", + ":1:20: note: invalid byte: '\\r'", + }); + } + { const case = ctx.obj("missing semicolon at EOF", .{}); case.addError( -- cgit v1.2.3