From fa57463bb9c09ee1e50012d7e50d120e1599fb81 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 12 May 2020 21:44:08 +0300 Subject: make parser testError take a list of expected errors --- lib/std/zig/parser_test.zig | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index b98e8c69c3..749fd1e832 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -19,7 +19,9 @@ test "zig fmt: decl between fields" { \\ const baz1 = 2; \\ b: usize, \\}; - ); + , &[_]Error{ + .DeclBetweenFields, + }); } test "zig fmt: errdefer with payload" { @@ -2828,7 +2830,9 @@ test "zig fmt: extern without container keyword returns error" { try testError( \\const container = extern {}; \\ - ); + , &[_]Error{ + .ExpectedExpr, + }); } test "zig fmt: integer literals with underscore separators" { @@ -3030,9 +3034,17 @@ fn testTransform(source: []const u8, expected_source: []const u8) !void { fn testCanonical(source: []const u8) !void { return testTransform(source, source); } -fn testError(source: []const u8) !void { + +const Error = @TagType(std.zig.ast.Error); + +fn testError(source: []const u8, expected_errors: []const Error) !void { const tree = try std.zig.parse(std.testing.allocator, source); defer tree.deinit(); - std.testing.expect(tree.errors.len != 0); + std.testing.expect(tree.errors.len == expected_errors.len); + for (expected_errors) |expected, i| { + const err = tree.errors.at(i); + + std.testing.expect(expected == err.*); + } } -- cgit v1.2.3 From df22c7dfef7312474865c868a633c8e9bbaa63fa Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 12 May 2020 22:37:39 +0300 Subject: std.zig attempt to continue parsing on error --- lib/std/zig/parse.zig | 99 ++++++++++++++++++++++++++++++++++----------- lib/std/zig/parser_test.zig | 10 +++++ 2 files changed, 86 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 031fd9c160..eec1765002 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -48,31 +48,21 @@ pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!*Tree { while (it.peek().?.id == .LineComment) _ = it.next(); - tree.root_node = parseRoot(arena, &it, tree) catch |err| blk: { - switch (err) { - error.ParseError => { - assert(tree.errors.len != 0); - break :blk undefined; - }, - error.OutOfMemory => { - return error.OutOfMemory; - }, - } - }; + tree.root_node = try parseRoot(arena, &it, tree); return tree; } /// Root <- skip ContainerMembers eof -fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!*Node.Root { +fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!*Node.Root { const node = try arena.create(Node.Root); node.* = .{ .decls = try parseContainerMembers(arena, it, tree), - .eof_token = eatToken(it, .Eof) orelse { + .eof_token = eatToken(it, .Eof) orelse blk: { try tree.errors.push(.{ .ExpectedContainerMembers = .{ .token = it.index }, }); - return error.ParseError; + break :blk undefined; }, }; return node; @@ -85,7 +75,7 @@ fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!*Node.Roo /// / KEYWORD_pub? ContainerField COMMA ContainerMembers /// / KEYWORD_pub? ContainerField /// / -fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !Node.Root.DeclList { +fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!Node.Root.DeclList { var list = Node.Root.DeclList.init(arena); var field_state: union(enum) { @@ -108,7 +98,13 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No const doc_comments = try parseDocComment(arena, it, tree); - if (try parseTestDecl(arena, it, tree)) |node| { + if (parseTestDecl(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + findEndOfBlock(it); + continue; + }, + }) |node| { if (field_state == .seen) { field_state = .{ .end = node.firstToken() }; } @@ -117,7 +113,13 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No continue; } - if (try parseTopLevelComptime(arena, it, tree)) |node| { + if (parseTopLevelComptime(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + findEndOfBlock(it); + continue; + }, + }) |node| { if (field_state == .seen) { field_state = .{ .end = node.firstToken() }; } @@ -128,7 +130,15 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No const visib_token = eatToken(it, .Keyword_pub); - if (try parseTopLevelDecl(arena, it, tree)) |node| { + if (parseTopLevelDecl(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // attempt to recover by finding a semicolon + // TODO if this was a function with a body we should use findEndOfBlock + findToken(it, .Semicolon); + continue; + }, + }) |node| { if (field_state == .seen) { field_state = .{ .end = visib_token orelse node.firstToken() }; } @@ -163,10 +173,17 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No try tree.errors.push(.{ .ExpectedPubItem = .{ .token = it.index }, }); - return error.ParseError; + // ignore this pub } - if (try parseContainerField(arena, it, tree)) |node| { + if (parseContainerField(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // attempt to recover by finding a comma + findToken(it, .Comma); + continue; + }, + }) |node| { switch (field_state) { .none => field_state = .seen, .err, .seen => {}, @@ -200,8 +217,39 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !No return list; } +/// Attempts to find a closing brace, assumes the opening brace was found. +fn findEndOfBlock(it: *TokenIterator) void { + var count: u32 = 1; + while (it.next()) |tok| switch (tok.id) { + .LBrace => count += 1, + .RBrace => { + count -= 1; + if (count == 0) return; + }, + else => {}, + }; +} + +/// Attempts to find `wanted` token, keeps track of parentheses. +fn findToken(it: *TokenIterator, wanted: Token.Id) void { + var count: u32 = 0; + while (it.next()) |tok| switch (tok.id) { + .LParen, .LBracket, .LBrace => count += 1, + .RParen, .RBracket, .RBrace => { + if (count == 0) { + _ = it.prev(); + return; + } + count -= 1; + }, + else => { + if (tok.id == wanted and count == 0) return; + }, + }; +} + /// Eat a multiline container doc comment -fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { +fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!?*Node { var lines = Node.DocComment.LineList.init(arena); while (eatToken(it, .ContainerDocComment)) |line| { try lines.push(line); @@ -687,8 +735,13 @@ fn parseLoopStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Nod node.cast(Node.While).?.inline_token = inline_token; return node; } + if (inline_token == null) return null; - return null; + // If we've seen "inline", there should have been a "for" or "while" + try tree.errors.push(.{ + .ExpectedInlinable = .{ .token = it.index }, + }); + return error.ParseError; } /// ForStatement @@ -2925,7 +2978,7 @@ fn parseDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node.D } /// Eat a single-line doc comment on the same line as another node -fn parseAppendedDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree, after_token: TokenIndex) !?*Node.DocComment { +fn parseAppendedDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree, after_token: TokenIndex) Allocator.Error!?*Node.DocComment { const comment_token = eatToken(it, .DocComment) orelse return null; if (tree.tokensOnSameLine(after_token, comment_token)) { const node = try arena.create(Node.DocComment); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 749fd1e832..a925aacc0b 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -1,3 +1,13 @@ +test "zig fmt: fault tolerant parsing" { + try testError( + \\test "" {inline} + \\test "" {inline} + , &[_]Error{ + .ExpectedInlinable, + .ExpectedInlinable, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, -- cgit v1.2.3 From 91358f3092fb2004bb46572d44a0e377ed400565 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 16:51:23 +0300 Subject: continue parsing on extra qualifier errors --- lib/std/zig/parse.zig | 53 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index eec1765002..586af02194 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -226,6 +226,10 @@ fn findEndOfBlock(it: *TokenIterator) void { count -= 1; if (count == 0) return; }, + .Eof => { + _ = it.prev(); + return; + }, else => {}, }; } @@ -242,8 +246,12 @@ fn findToken(it: *TokenIterator, wanted: Token.Id) void { } count -= 1; }, + .Eof => { + _ = it.prev(); + return; + }, else => { - if (tok.id == wanted and count == 0) return; + if (tok.id == wanted and count == 0) return; }, }; } @@ -2324,7 +2332,7 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node const node = try arena.create(Node.AnyFrameType); node.* = .{ .anyframe_token = token, - .result = Node.AnyFrameType.Result{ + .result = .{ .arrow_token = arrow, .return_type = undefined, // set by caller }, @@ -2365,6 +2373,13 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node } else null; _ = try expectToken(it, tree, .RParen); + if (ptr_info.align_info != null) { + try tree.errors.push(.{ + .ExtraAlignQualifier = .{ .token = it.index - 1 }, + }); + continue; + } + ptr_info.align_info = Node.PrefixOp.PtrInfo.Align{ .node = expr_node, .bit_range = bit_range, @@ -2373,14 +2388,32 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node continue; } if (eatToken(it, .Keyword_const)) |const_token| { + if (ptr_info.const_token != null) { + try tree.errors.push(.{ + .ExtraConstQualifier = .{ .token = it.index - 1 }, + }); + continue; + } ptr_info.const_token = const_token; continue; } if (eatToken(it, .Keyword_volatile)) |volatile_token| { + if (ptr_info.volatile_token != null) { + try tree.errors.push(.{ + .ExtraVolatileQualifier = .{ .token = it.index - 1 }, + }); + continue; + } ptr_info.volatile_token = volatile_token; continue; } if (eatToken(it, .Keyword_allowzero)) |allowzero_token| { + if (ptr_info.allowzero_token != null) { + try tree.errors.push(.{ + .ExtraAllowZeroQualifier = .{ .token = it.index - 1 }, + }); + continue; + } ptr_info.allowzero_token = allowzero_token; continue; } @@ -2399,9 +2432,9 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node if (try parseByteAlign(arena, it, tree)) |align_expr| { if (slice_type.align_info != null) { try tree.errors.push(.{ - .ExtraAlignQualifier = .{ .token = it.index }, + .ExtraAlignQualifier = .{ .token = it.index - 1 }, }); - return error.ParseError; + continue; } slice_type.align_info = Node.PrefixOp.PtrInfo.Align{ .node = align_expr, @@ -2412,9 +2445,9 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node if (eatToken(it, .Keyword_const)) |const_token| { if (slice_type.const_token != null) { try tree.errors.push(.{ - .ExtraConstQualifier = .{ .token = it.index }, + .ExtraConstQualifier = .{ .token = it.index - 1 }, }); - return error.ParseError; + continue; } slice_type.const_token = const_token; continue; @@ -2422,9 +2455,9 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node if (eatToken(it, .Keyword_volatile)) |volatile_token| { if (slice_type.volatile_token != null) { try tree.errors.push(.{ - .ExtraVolatileQualifier = .{ .token = it.index }, + .ExtraVolatileQualifier = .{ .token = it.index - 1 }, }); - return error.ParseError; + continue; } slice_type.volatile_token = volatile_token; continue; @@ -2432,9 +2465,9 @@ fn parsePrefixTypeOp(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node if (eatToken(it, .Keyword_allowzero)) |allowzero_token| { if (slice_type.allowzero_token != null) { try tree.errors.push(.{ - .ExtraAllowZeroQualifier = .{ .token = it.index }, + .ExtraAllowZeroQualifier = .{ .token = it.index - 1 }, }); - return error.ParseError; + continue; } slice_type.allowzero_token = allowzero_token; continue; -- cgit v1.2.3 From be392777b763028a02866342b12915a1e69ebdf1 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 17:21:27 +0300 Subject: continue parsing after missing commas and invalid statements --- lib/std/zig/ast.zig | 4 ++++ lib/std/zig/parse.zig | 28 ++++++++++++++++++++++++++-- lib/std/zig/parser_test.zig | 24 ++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 8dec2de15c..dd9abe61ac 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -165,6 +165,7 @@ pub const Error = union(enum) { ExpectedDerefOrUnwrap: ExpectedDerefOrUnwrap, ExpectedSuffixOp: ExpectedSuffixOp, DeclBetweenFields: DeclBetweenFields, + MissingComma: MissingComma, pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { switch (self.*) { @@ -213,6 +214,7 @@ pub const Error = union(enum) { .ExpectedDerefOrUnwrap => |*x| return x.render(tokens, stream), .ExpectedSuffixOp => |*x| return x.render(tokens, stream), .DeclBetweenFields => |*x| return x.render(tokens, stream), + .MissingComma => |*x| return x.render(tokens, stream), } } @@ -263,6 +265,7 @@ pub const Error = union(enum) { .ExpectedDerefOrUnwrap => |x| return x.token, .ExpectedSuffixOp => |x| return x.token, .DeclBetweenFields => |x| return x.token, + .MissingComma => |x| return x.token, } } @@ -308,6 +311,7 @@ pub const Error = union(enum) { pub const ExtraVolatileQualifier = SimpleError("Extra volatile qualifier"); pub const ExtraAllowZeroQualifier = SimpleError("Extra allowzero qualifier"); pub const DeclBetweenFields = SimpleError("Declarations are not allowed between container fields"); + pub const MissingComma = SimpleError("Expected comma between items"); pub const ExpectedCall = struct { node: *Node, diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 586af02194..ad4f5e1242 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -1083,7 +1083,14 @@ fn parseBlock(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { var statements = Node.Block.StatementList.init(arena); while (true) { - const statement = (try parseStatement(arena, it, tree)) orelse break; + const statement = (parseStatement(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // try to skip to the next statement + findToken(it, .Semicolon); + continue; + }, + }) orelse break; try statements.push(statement); } @@ -2816,7 +2823,24 @@ fn ListParseFn(comptime L: type, comptime nodeParseFn: var) ParseFn(L) { var list = L.init(arena); while (try nodeParseFn(arena, it, tree)) |node| { try list.push(node); - if (eatToken(it, .Comma) == null) break; + + const token = nextToken(it); + switch (token.ptr.id) { + .Comma => {}, + // all possible delimiters + .Colon, .RParen, .RBrace, .RBracket => { + putBackToken(it, token.index); + break; + }, + else => { + // this is likely just a missing comma, + // continue parsing this list and give an error + try tree.errors.push(.{ + .MissingComma = .{ .token = token.index }, + }); + putBackToken(it, token.index); + }, + } } return list; } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index a925aacc0b..2c8b53f7f0 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6,6 +6,30 @@ test "zig fmt: fault tolerant parsing" { .ExpectedInlinable, .ExpectedInlinable, }); + try testError( + \\test "" { + \\ foo + +; + \\ inline; + \\} + , &[_]Error{ + .InvalidToken, + .ExpectedInlinable, + }); + try testError( + \\test "" { + \\ switch (foo) { + \\ 2 => {} + \\ 3 => {} + \\ else => { + \\ inline; + \\ } + \\ } + \\} + , &[_]Error{ + .MissingComma, + .MissingComma, + .ExpectedInlinable, + }); } test "zig fmt: top-level fields" { -- cgit v1.2.3 From cefc04348e79f85118ad3c7b7fd15a4a5d635d50 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 17:36:06 +0300 Subject: continue parsing on invalid and token --- lib/std/zig/ast.zig | 7 ++++--- lib/std/zig/parse.zig | 25 +++++++++++++++---------- lib/std/zig/parser_test.zig | 5 +++-- 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index dd9abe61ac..339927d636 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -166,6 +166,7 @@ pub const Error = union(enum) { ExpectedSuffixOp: ExpectedSuffixOp, DeclBetweenFields: DeclBetweenFields, MissingComma: MissingComma, + InvalidAnd: InvalidAnd, pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { switch (self.*) { @@ -215,6 +216,7 @@ pub const Error = union(enum) { .ExpectedSuffixOp => |*x| return x.render(tokens, stream), .DeclBetweenFields => |*x| return x.render(tokens, stream), .MissingComma => |*x| return x.render(tokens, stream), + .InvalidAnd => |*x| return x.render(tokens, stream), } } @@ -266,6 +268,7 @@ pub const Error = union(enum) { .ExpectedSuffixOp => |x| return x.token, .DeclBetweenFields => |x| return x.token, .MissingComma => |x| return x.token, + .InvalidAnd => |x| return x.token, } } @@ -312,6 +315,7 @@ pub const Error = union(enum) { pub const ExtraAllowZeroQualifier = SimpleError("Extra allowzero qualifier"); pub const DeclBetweenFields = SimpleError("Declarations are not allowed between container fields"); pub const MissingComma = SimpleError("Expected comma between items"); + pub const InvalidAnd = SimpleError("`&&` is invalid. Note that `and` is boolean AND."); pub const ExpectedCall = struct { node: *Node, @@ -339,9 +343,6 @@ pub const Error = union(enum) { pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { const found_token = tokens.at(self.token); switch (found_token.id) { - .Invalid_ampersands => { - return stream.print("`&&` is invalid. Note that `and` is boolean AND.", .{}); - }, .Invalid => { return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()}); }, diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index ad4f5e1242..af501cd8a4 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -2824,21 +2824,16 @@ fn ListParseFn(comptime L: type, comptime nodeParseFn: var) ParseFn(L) { while (try nodeParseFn(arena, it, tree)) |node| { try list.push(node); - const token = nextToken(it); - switch (token.ptr.id) { - .Comma => {}, + switch (it.peek().?.id) { + .Comma => _ = nextToken(it), // all possible delimiters - .Colon, .RParen, .RBrace, .RBracket => { - putBackToken(it, token.index); - break; - }, + .Colon, .RParen, .RBrace, .RBracket => break, else => { // this is likely just a missing comma, // continue parsing this list and give an error try tree.errors.push(.{ - .MissingComma = .{ .token = token.index }, + .MissingComma = .{ .token = it.index }, }); - putBackToken(it, token.index); }, } } @@ -2850,7 +2845,17 @@ fn ListParseFn(comptime L: type, comptime nodeParseFn: var) ParseFn(L) { fn SimpleBinOpParseFn(comptime token: Token.Id, comptime op: Node.InfixOp.Op) NodeParseFn { return struct { pub fn parse(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!?*Node { - const op_token = eatToken(it, token) orelse return null; + const op_token = if (token == .Keyword_and) switch (it.peek().?.id) { + .Keyword_and => nextToken(it).index, + .Invalid_ampersands => blk: { + try tree.errors.push(.{ + .InvalidAnd = .{ .token = it.index }, + }); + break :blk nextToken(it).index; + }, + else => return null, + } else eatToken(it, token) orelse return null; + const node = try arena.create(Node.InfixOp); node.* = .{ .op_token = op_token, diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 2c8b53f7f0..b92ce1ea23 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -21,14 +21,15 @@ test "zig fmt: fault tolerant parsing" { \\ 2 => {} \\ 3 => {} \\ else => { - \\ inline; + \\ foo && bar +; \\ } \\ } \\} , &[_]Error{ .MissingComma, .MissingComma, - .ExpectedInlinable, + .InvalidAnd, + .InvalidToken, }); } -- cgit v1.2.3 From 23c5ff94e9dda07694762fcc829dbac006bb00a1 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 20:42:18 +0300 Subject: improve recovery on top level declarations --- lib/std/zig/ast.zig | 2 + lib/std/zig/parse.zig | 157 +++++++++++++++++++++++++------------------- lib/std/zig/parser_test.zig | 31 ++++++++- lib/std/zig/render.zig | 6 +- 4 files changed, 127 insertions(+), 69 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 339927d636..afb7fce23e 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -893,6 +893,7 @@ pub const Node = struct { pub const ReturnType = union(enum) { Explicit: *Node, InferErrorSet: *Node, + Invalid, }; pub fn iterate(self: *FnProto, index: usize) ?*Node { @@ -942,6 +943,7 @@ pub const Node = struct { if (self.body_node) |body_node| return body_node.lastToken(); switch (self.return_type) { .Explicit, .InferErrorSet => |node| return node.lastToken(), + .Invalid => unreachable, } } }; diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index af501cd8a4..6052d92d6a 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -130,15 +130,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All const visib_token = eatToken(it, .Keyword_pub); - if (parseTopLevelDecl(arena, it, tree) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.ParseError => { - // attempt to recover by finding a semicolon - // TODO if this was a function with a body we should use findEndOfBlock - findToken(it, .Semicolon); - continue; - }, - }) |node| { + if (try parseTopLevelDecl(arena, it, tree)) |node| { if (field_state == .seen) { field_state = .{ .end = visib_token orelse node.firstToken() }; } @@ -217,43 +209,49 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All return list; } -/// Attempts to find a closing brace, assumes the opening brace was found. +/// Attempts to find a closing brace. fn findEndOfBlock(it: *TokenIterator) void { - var count: u32 = 1; - while (it.next()) |tok| switch (tok.id) { - .LBrace => count += 1, - .RBrace => { - count -= 1; - if (count == 0) return; - }, - .Eof => { - _ = it.prev(); - return; - }, - else => {}, - }; + var count: u32 = 0; + while (true) { + const tok = nextToken(it); + switch (tok.ptr.id) { + .LBrace => count += 1, + .RBrace => { + if (count <= 1) return; + count -= 1; + }, + .Eof => { + putBackToken(it, tok.index); + return; + }, + else => {}, + } + } } /// Attempts to find `wanted` token, keeps track of parentheses. fn findToken(it: *TokenIterator, wanted: Token.Id) void { var count: u32 = 0; - while (it.next()) |tok| switch (tok.id) { - .LParen, .LBracket, .LBrace => count += 1, - .RParen, .RBracket, .RBrace => { - if (count == 0) { - _ = it.prev(); + while (true) { + const tok = nextToken(it); + switch (tok.ptr.id) { + .LParen, .LBracket, .LBrace => count += 1, + .RParen, .RBracket, .RBrace => { + if (count == 0) { + putBackToken(it, tok.index); + return; + } + count -= 1; + }, + .Eof => { + putBackToken(it, tok.index); return; - } - count -= 1; - }, - .Eof => { - _ = it.prev(); - return; - }, - else => { - if (tok.id == wanted and count == 0) return; - }, - }; + }, + else => { + if (tok.ptr.id == wanted and count == 0) return; + }, + } + } } /// Eat a multiline container doc comment @@ -317,7 +315,7 @@ fn parseTopLevelComptime(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?* /// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) /// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl /// / KEYWORD_usingnamespace Expr SEMICOLON -fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { +fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!?*Node { var lib_name: ?*Node = null; const extern_export_inline_token = blk: { if (eatToken(it, .Keyword_export)) |token| break :blk token; @@ -330,12 +328,26 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node break :blk null; }; - if (try parseFnProto(arena, it, tree)) |node| { + if (parseFnProto(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // this fn will likely have a body so we + // use findEndOfBlock instead of findToken. + findEndOfBlock(it); + return null; + }, + }) |node| { const fn_node = node.cast(Node.FnProto).?; fn_node.*.extern_export_inline_token = extern_export_inline_token; fn_node.*.lib_name = lib_name; if (eatToken(it, .Semicolon)) |_| return node; - if (try parseBlock(arena, it, tree)) |body_node| { + if (parseBlock(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + // since parseBlock only return error.ParseError on + // a missing '}' we can assume this function was + // supposed to end here. + error.ParseError => null, + }) |body_node| { fn_node.body_node = body_node; return node; } @@ -356,7 +368,14 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node const thread_local_token = eatToken(it, .Keyword_threadlocal); - if (try parseVarDecl(arena, it, tree)) |node| { + if (parseVarDecl(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // try to skip to next decl + findToken(it, .Semicolon); + return null; + }, + }) |node| { var var_decl = node.cast(Node.VarDecl).?; var_decl.*.thread_local_token = thread_local_token; var_decl.*.comptime_token = null; @@ -369,7 +388,8 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node try tree.errors.push(.{ .ExpectedVarDecl = .{ .token = it.index }, }); - return error.ParseError; + // ignore this, try to find next decl by skipping the next block + findEndOfBlock(it); } if (extern_export_inline_token) |token| { @@ -379,16 +399,14 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node return null; } - const use_node = (try parseUse(arena, it, tree)) orelse return null; - const expr_node = try expectNode(arena, it, tree, parseExpr, .{ - .ExpectedExpr = .{ .token = it.index }, - }); - const semicolon_token = try expectToken(it, tree, .Semicolon); - const use_node_raw = use_node.cast(Node.Use).?; - use_node_raw.*.expr = expr_node; - use_node_raw.*.semicolon_token = semicolon_token; - - return use_node; + return parseUse(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // try to skip to next decl + findToken(it, .Semicolon); + return null; + }, + }; } /// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? LinkSection? EXCLAMATIONMARK? (KEYWORD_var / TypeExpr) @@ -422,18 +440,23 @@ fn parseFnProto(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { const exclamation_token = eatToken(it, .Bang); const return_type_expr = (try parseVarType(arena, it, tree)) orelse - try expectNode(arena, it, tree, parseTypeExpr, .{ - .ExpectedReturnType = .{ .token = it.index }, - }); + (try parseTypeExpr(arena, it, tree)) orelse blk: { + try tree.errors.push(.{ + .ExpectedReturnType = .{ .token = it.index }, + }); + // most likely the user forgot to specify the return type. + // Mark return type as invalid and try to continue. + break :blk null; + }; - const return_type: Node.FnProto.ReturnType = if (exclamation_token != null) - .{ - .InferErrorSet = return_type_expr, - } + // TODO https://github.com/ziglang/zig/issues/3750 + const R = Node.FnProto.ReturnType; + const return_type = if (return_type_expr == null) + R{ .Invalid = {} } + else if (exclamation_token != null) + R{ .InferErrorSet = return_type_expr.? } else - .{ - .Explicit = return_type_expr, - }; + R{ .Explicit = return_type_expr.? }; const var_args_token = if (params.len > 0) params.at(params.len - 1).*.cast(Node.ParamDecl).?.var_args_token @@ -2992,8 +3015,10 @@ fn parseUse(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { .doc_comments = null, .visib_token = null, .use_token = token, - .expr = undefined, // set by caller - .semicolon_token = undefined, // set by caller + .expr = try expectNode(arena, it, tree, parseExpr, .{ + .ExpectedExpr = .{ .token = it.index }, + }), + .semicolon_token = try expectToken(it, tree, .Semicolon), }; return &node.base; } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index b92ce1ea23..e3bc20bd10 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -1,4 +1,4 @@ -test "zig fmt: fault tolerant parsing" { +test "recovery: top level" { try testError( \\test "" {inline} \\test "" {inline} @@ -6,6 +6,9 @@ test "zig fmt: fault tolerant parsing" { .ExpectedInlinable, .ExpectedInlinable, }); +} + +test "recovery: block statements" { try testError( \\test "" { \\ foo + +; @@ -15,6 +18,9 @@ test "zig fmt: fault tolerant parsing" { .InvalidToken, .ExpectedInlinable, }); +} + +test "recovery: missing comma" { try testError( \\test "" { \\ switch (foo) { @@ -33,6 +39,29 @@ test "zig fmt: fault tolerant parsing" { }); } +test "recovery: extra qualifier" { + try testError( + \\const a: *const const u8; + \\test "" + , &[_]Error{ + .ExtraConstQualifier, + .ExpectedLBrace, + }); +} + +test "recovery: missing return type" { + try testError( + \\fn foo() { + \\ a && b; + \\} + \\test "" + , &[_]Error{ + .ExpectedReturnType, + .InvalidAnd, + .ExpectedLBrace, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index bcdc08d43a..64f2f77f6c 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -1444,6 +1444,7 @@ fn renderExpression( else switch (fn_proto.return_type) { .Explicit => |node| node.firstToken(), .InferErrorSet => |node| tree.prevToken(node.firstToken()), + .Invalid => unreachable, }); assert(tree.tokens.at(rparen).id == .RParen); @@ -1518,13 +1519,14 @@ fn renderExpression( } switch (fn_proto.return_type) { - ast.Node.FnProto.ReturnType.Explicit => |node| { + .Explicit => |node| { return renderExpression(allocator, stream, tree, indent, start_col, node, space); }, - ast.Node.FnProto.ReturnType.InferErrorSet => |node| { + .InferErrorSet => |node| { try renderToken(tree, stream, tree.prevToken(node.firstToken()), indent, start_col, Space.None); // ! return renderExpression(allocator, stream, tree, indent, start_col, node, space); }, + .Invalid => unreachable, } }, -- cgit v1.2.3 From 2296906e2ad54c387b4b19784148c47a26969cdc Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 23:08:42 +0300 Subject: modernize std.zig.tokenizer --- lib/std/zig/tokenizer.zig | 954 +++++++++++++++++++++++----------------------- 1 file changed, 477 insertions(+), 477 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 089711871e..160530f459 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -353,64 +353,64 @@ pub const Tokenizer = struct { } const State = enum { - Start, - Identifier, - Builtin, - StringLiteral, - StringLiteralBackslash, - MultilineStringLiteralLine, - CharLiteral, - CharLiteralBackslash, - CharLiteralHexEscape, - CharLiteralUnicodeEscapeSawU, - CharLiteralUnicodeEscape, - CharLiteralUnicodeInvalid, - CharLiteralUnicode, - CharLiteralEnd, - Backslash, - Equal, - Bang, - Pipe, - Minus, - MinusPercent, - Asterisk, - AsteriskPercent, - Slash, - LineCommentStart, - LineComment, - DocCommentStart, - DocComment, - ContainerDocComment, - Zero, - IntegerLiteralDec, - IntegerLiteralDecNoUnderscore, - IntegerLiteralBin, - IntegerLiteralBinNoUnderscore, - IntegerLiteralOct, - IntegerLiteralOctNoUnderscore, - IntegerLiteralHex, - IntegerLiteralHexNoUnderscore, - NumberDotDec, - NumberDotHex, - FloatFractionDec, - FloatFractionDecNoUnderscore, - FloatFractionHex, - FloatFractionHexNoUnderscore, - FloatExponentUnsigned, - FloatExponentNumber, - FloatExponentNumberNoUnderscore, - Ampersand, - Caret, - Percent, - Plus, - PlusPercent, - AngleBracketLeft, - AngleBracketAngleBracketLeft, - AngleBracketRight, - AngleBracketAngleBracketRight, - Period, - Period2, - SawAtSign, + start, + identifier, + builtin, + string_literal, + string_literal_backslash, + multiline_string_literal_line, + char_literal, + char_literal_backslash, + char_literal_hex_escape, + char_literal_unicode_escape_saw_u, + char_literal_unicode_escape, + char_literal_unicode_invalid, + char_literal_unicode, + char_literal_end, + backslash, + equal, + bang, + pipe, + minus, + minus_percent, + asterisk, + asterisk_percent, + slash, + line_comment_start, + line_comment, + doc_comment_start, + doc_comment, + container_doc_comment, + zero, + int_literal_dec, + int_literal_dec_no_underscore, + int_literal_bin, + int_literal_bin_no_underscore, + int_literal_oct, + int_literal_oct_no_underscore, + int_literal_hex, + int_literal_hex_no_underscore, + num_dot_dec, + num_dot_hex, + float_fraction_dec, + float_fraction_dec_no_underscore, + float_fraction_hex, + float_fraction_hex_no_underscore, + float_exponent_unsigned, + float_exponent_num, + float_exponent_num_no_underscore, + ampersand, + caret, + percent, + plus, + plus_percent, + angle_bracket_left, + angle_bracket_angle_bracket_left, + angle_bracket_right, + angle_bracket_angle_bracket_right, + period, + period_2, + saw_at_sign, }; fn isIdentifierChar(char: u8) bool { @@ -423,9 +423,9 @@ pub const Tokenizer = struct { return token; } const start_index = self.index; - var state = State.Start; + var state: State = .start; var result = Token{ - .id = Token.Id.Eof, + .id = .Eof, .start = self.index, .end = undefined, }; @@ -434,40 +434,40 @@ pub const Tokenizer = struct { while (self.index < self.buffer.len) : (self.index += 1) { const c = self.buffer[self.index]; switch (state) { - State.Start => switch (c) { + .start => switch (c) { ' ', '\n', '\t', '\r' => { result.start = self.index + 1; }, '"' => { - state = State.StringLiteral; - result.id = Token.Id.StringLiteral; + state = .string_literal; + result.id = .StringLiteral; }, '\'' => { - state = State.CharLiteral; + state = .char_literal; }, 'a'...'z', 'A'...'Z', '_' => { - state = State.Identifier; - result.id = Token.Id.Identifier; + state = .identifier; + result.id = .Identifier; }, '@' => { - state = State.SawAtSign; + state = .saw_at_sign; }, '=' => { - state = State.Equal; + state = .equal; }, '!' => { - state = State.Bang; + state = .bang; }, '|' => { - state = State.Pipe; + state = .pipe; }, '(' => { - result.id = Token.Id.LParen; + result.id = .LParen; self.index += 1; break; }, ')' => { - result.id = Token.Id.RParen; + result.id = .RParen; self.index += 1; break; }, @@ -477,213 +477,213 @@ pub const Tokenizer = struct { break; }, ']' => { - result.id = Token.Id.RBracket; + result.id = .RBracket; self.index += 1; break; }, ';' => { - result.id = Token.Id.Semicolon; + result.id = .Semicolon; self.index += 1; break; }, ',' => { - result.id = Token.Id.Comma; + result.id = .Comma; self.index += 1; break; }, '?' => { - result.id = Token.Id.QuestionMark; + result.id = .QuestionMark; self.index += 1; break; }, ':' => { - result.id = Token.Id.Colon; + result.id = .Colon; self.index += 1; break; }, '%' => { - state = State.Percent; + state = .percent; }, '*' => { - state = State.Asterisk; + state = .asterisk; }, '+' => { - state = State.Plus; + state = .plus; }, '<' => { - state = State.AngleBracketLeft; + state = .angle_bracket_left; }, '>' => { - state = State.AngleBracketRight; + state = .angle_bracket_right; }, '^' => { - state = State.Caret; + state = .caret; }, '\\' => { - state = State.Backslash; - result.id = Token.Id.MultilineStringLiteralLine; + state = .backslash; + result.id = .MultilineStringLiteralLine; }, '{' => { - result.id = Token.Id.LBrace; + result.id = .LBrace; self.index += 1; break; }, '}' => { - result.id = Token.Id.RBrace; + result.id = .RBrace; self.index += 1; break; }, '~' => { - result.id = Token.Id.Tilde; + result.id = .Tilde; self.index += 1; break; }, '.' => { - state = State.Period; + state = .period; }, '-' => { - state = State.Minus; + state = .minus; }, '/' => { - state = State.Slash; + state = .slash; }, '&' => { - state = State.Ampersand; + state = .ampersand; }, '0' => { - state = State.Zero; - result.id = Token.Id.IntegerLiteral; + state = .zero; + result.id = .IntegerLiteral; }, '1'...'9' => { - state = State.IntegerLiteralDec; - result.id = Token.Id.IntegerLiteral; + state = .int_literal_dec; + result.id = .IntegerLiteral; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; self.index += 1; break; }, }, - State.SawAtSign => switch (c) { + .saw_at_sign => switch (c) { '"' => { - result.id = Token.Id.Identifier; - state = State.StringLiteral; + result.id = .Identifier; + state = .string_literal; }, else => { // reinterpret as a builtin self.index -= 1; - state = State.Builtin; - result.id = Token.Id.Builtin; + state = .builtin; + result.id = .Builtin; }, }, - State.Ampersand => switch (c) { + .ampersand => switch (c) { '&' => { - result.id = Token.Id.Invalid_ampersands; + result.id = .Invalid_ampersands; self.index += 1; break; }, '=' => { - result.id = Token.Id.AmpersandEqual; + result.id = .AmpersandEqual; self.index += 1; break; }, else => { - result.id = Token.Id.Ampersand; + result.id = .Ampersand; break; }, }, - State.Asterisk => switch (c) { + .asterisk => switch (c) { '=' => { - result.id = Token.Id.AsteriskEqual; + result.id = .AsteriskEqual; self.index += 1; break; }, '*' => { - result.id = Token.Id.AsteriskAsterisk; + result.id = .AsteriskAsterisk; self.index += 1; break; }, '%' => { - state = State.AsteriskPercent; + state = .asterisk_percent; }, else => { - result.id = Token.Id.Asterisk; + result.id = .Asterisk; break; }, }, - State.AsteriskPercent => switch (c) { + .asterisk_percent => switch (c) { '=' => { - result.id = Token.Id.AsteriskPercentEqual; + result.id = .AsteriskPercentEqual; self.index += 1; break; }, else => { - result.id = Token.Id.AsteriskPercent; + result.id = .AsteriskPercent; break; }, }, - State.Percent => switch (c) { + .percent => switch (c) { '=' => { - result.id = Token.Id.PercentEqual; + result.id = .PercentEqual; self.index += 1; break; }, else => { - result.id = Token.Id.Percent; + result.id = .Percent; break; }, }, - State.Plus => switch (c) { + .plus => switch (c) { '=' => { - result.id = Token.Id.PlusEqual; + result.id = .PlusEqual; self.index += 1; break; }, '+' => { - result.id = Token.Id.PlusPlus; + result.id = .PlusPlus; self.index += 1; break; }, '%' => { - state = State.PlusPercent; + state = .plus_percent; }, else => { - result.id = Token.Id.Plus; + result.id = .Plus; break; }, }, - State.PlusPercent => switch (c) { + .plus_percent => switch (c) { '=' => { - result.id = Token.Id.PlusPercentEqual; + result.id = .PlusPercentEqual; self.index += 1; break; }, else => { - result.id = Token.Id.PlusPercent; + result.id = .PlusPercent; break; }, }, - State.Caret => switch (c) { + .caret => switch (c) { '=' => { - result.id = Token.Id.CaretEqual; + result.id = .CaretEqual; self.index += 1; break; }, else => { - result.id = Token.Id.Caret; + result.id = .Caret; break; }, }, - State.Identifier => switch (c) { + .identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { if (Token.getKeyword(self.buffer[result.start..self.index])) |id| { @@ -692,19 +692,19 @@ pub const Tokenizer = struct { break; }, }, - State.Builtin => switch (c) { + .builtin => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => break, }, - State.Backslash => switch (c) { + .backslash => switch (c) { '\\' => { - state = State.MultilineStringLiteralLine; + state = .multiline_string_literal_line; }, else => break, }, - State.StringLiteral => switch (c) { + .string_literal => switch (c) { '\\' => { - state = State.StringLiteralBackslash; + state = .string_literal_backslash; }, '"' => { self.index += 1; @@ -714,98 +714,98 @@ pub const Tokenizer = struct { else => self.checkLiteralCharacter(), }, - State.StringLiteralBackslash => switch (c) { + .string_literal_backslash => switch (c) { '\n', '\r' => break, // Look for this error later. else => { - state = State.StringLiteral; + state = .string_literal; }, }, - State.CharLiteral => switch (c) { + .char_literal => switch (c) { '\\' => { - state = State.CharLiteralBackslash; + state = .char_literal_backslash; }, '\'', 0x80...0xbf, 0xf8...0xff => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, 0xc0...0xdf => { // 110xxxxx remaining_code_units = 1; - state = State.CharLiteralUnicode; + state = .char_literal_unicode; }, 0xe0...0xef => { // 1110xxxx remaining_code_units = 2; - state = State.CharLiteralUnicode; + state = .char_literal_unicode; }, 0xf0...0xf7 => { // 11110xxx remaining_code_units = 3; - state = State.CharLiteralUnicode; + state = .char_literal_unicode; }, else => { - state = State.CharLiteralEnd; + state = .char_literal_end; }, }, - State.CharLiteralBackslash => switch (c) { + .char_literal_backslash => switch (c) { '\n' => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, 'x' => { - state = State.CharLiteralHexEscape; + state = .char_literal_hex_escape; seen_escape_digits = 0; }, 'u' => { - state = State.CharLiteralUnicodeEscapeSawU; + state = .char_literal_unicode_escape_saw_u; }, else => { - state = State.CharLiteralEnd; + state = .char_literal_end; }, }, - State.CharLiteralHexEscape => switch (c) { + .char_literal_hex_escape => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { seen_escape_digits += 1; if (seen_escape_digits == 2) { - state = State.CharLiteralEnd; + state = .char_literal_end; } }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.CharLiteralUnicodeEscapeSawU => switch (c) { + .char_literal_unicode_escape_saw_u => switch (c) { '{' => { - state = State.CharLiteralUnicodeEscape; + state = .char_literal_unicode_escape; seen_escape_digits = 0; }, else => { - result.id = Token.Id.Invalid; - state = State.CharLiteralUnicodeInvalid; + result.id = .Invalid; + state = .char_literal_unicode_invalid; }, }, - State.CharLiteralUnicodeEscape => switch (c) { + .char_literal_unicode_escape => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { seen_escape_digits += 1; }, '}' => { if (seen_escape_digits == 0) { - result.id = Token.Id.Invalid; - state = State.CharLiteralUnicodeInvalid; + result.id = .Invalid; + state = .char_literal_unicode_invalid; } else { - state = State.CharLiteralEnd; + state = .char_literal_end; } }, else => { - result.id = Token.Id.Invalid; - state = State.CharLiteralUnicodeInvalid; + result.id = .Invalid; + state = .char_literal_unicode_invalid; }, }, - State.CharLiteralUnicodeInvalid => switch (c) { + .char_literal_unicode_invalid => switch (c) { // Keep consuming characters until an obvious stopping point. // This consolidates e.g. `u{0ab1Q}` into a single invalid token // instead of creating the tokens `u{0ab1`, `Q`, `}` @@ -813,32 +813,32 @@ pub const Tokenizer = struct { else => break, }, - State.CharLiteralEnd => switch (c) { + .char_literal_end => switch (c) { '\'' => { - result.id = Token.Id.CharLiteral; + result.id = .CharLiteral; self.index += 1; break; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.CharLiteralUnicode => switch (c) { + .char_literal_unicode => switch (c) { 0x80...0xbf => { remaining_code_units -= 1; if (remaining_code_units == 0) { - state = State.CharLiteralEnd; + state = .char_literal_end; } }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.MultilineStringLiteralLine => switch (c) { + .multiline_string_literal_line => switch (c) { '\n' => { self.index += 1; break; @@ -847,449 +847,449 @@ pub const Tokenizer = struct { else => self.checkLiteralCharacter(), }, - State.Bang => switch (c) { + .bang => switch (c) { '=' => { - result.id = Token.Id.BangEqual; + result.id = .BangEqual; self.index += 1; break; }, else => { - result.id = Token.Id.Bang; + result.id = .Bang; break; }, }, - State.Pipe => switch (c) { + .pipe => switch (c) { '=' => { - result.id = Token.Id.PipeEqual; + result.id = .PipeEqual; self.index += 1; break; }, '|' => { - result.id = Token.Id.PipePipe; + result.id = .PipePipe; self.index += 1; break; }, else => { - result.id = Token.Id.Pipe; + result.id = .Pipe; break; }, }, - State.Equal => switch (c) { + .equal => switch (c) { '=' => { - result.id = Token.Id.EqualEqual; + result.id = .EqualEqual; self.index += 1; break; }, '>' => { - result.id = Token.Id.EqualAngleBracketRight; + result.id = .EqualAngleBracketRight; self.index += 1; break; }, else => { - result.id = Token.Id.Equal; + result.id = .Equal; break; }, }, - State.Minus => switch (c) { + .minus => switch (c) { '>' => { - result.id = Token.Id.Arrow; + result.id = .Arrow; self.index += 1; break; }, '=' => { - result.id = Token.Id.MinusEqual; + result.id = .MinusEqual; self.index += 1; break; }, '%' => { - state = State.MinusPercent; + state = .minus_percent; }, else => { - result.id = Token.Id.Minus; + result.id = .Minus; break; }, }, - State.MinusPercent => switch (c) { + .minus_percent => switch (c) { '=' => { - result.id = Token.Id.MinusPercentEqual; + result.id = .MinusPercentEqual; self.index += 1; break; }, else => { - result.id = Token.Id.MinusPercent; + result.id = .MinusPercent; break; }, }, - State.AngleBracketLeft => switch (c) { + .angle_bracket_left => switch (c) { '<' => { - state = State.AngleBracketAngleBracketLeft; + state = .angle_bracket_angle_bracket_left; }, '=' => { - result.id = Token.Id.AngleBracketLeftEqual; + result.id = .AngleBracketLeftEqual; self.index += 1; break; }, else => { - result.id = Token.Id.AngleBracketLeft; + result.id = .AngleBracketLeft; break; }, }, - State.AngleBracketAngleBracketLeft => switch (c) { + .angle_bracket_angle_bracket_left => switch (c) { '=' => { - result.id = Token.Id.AngleBracketAngleBracketLeftEqual; + result.id = .AngleBracketAngleBracketLeftEqual; self.index += 1; break; }, else => { - result.id = Token.Id.AngleBracketAngleBracketLeft; + result.id = .AngleBracketAngleBracketLeft; break; }, }, - State.AngleBracketRight => switch (c) { + .angle_bracket_right => switch (c) { '>' => { - state = State.AngleBracketAngleBracketRight; + state = .angle_bracket_angle_bracket_right; }, '=' => { - result.id = Token.Id.AngleBracketRightEqual; + result.id = .AngleBracketRightEqual; self.index += 1; break; }, else => { - result.id = Token.Id.AngleBracketRight; + result.id = .AngleBracketRight; break; }, }, - State.AngleBracketAngleBracketRight => switch (c) { + .angle_bracket_angle_bracket_right => switch (c) { '=' => { - result.id = Token.Id.AngleBracketAngleBracketRightEqual; + result.id = .AngleBracketAngleBracketRightEqual; self.index += 1; break; }, else => { - result.id = Token.Id.AngleBracketAngleBracketRight; + result.id = .AngleBracketAngleBracketRight; break; }, }, - State.Period => switch (c) { + .period => switch (c) { '.' => { - state = State.Period2; + state = .period_2; }, '*' => { - result.id = Token.Id.PeriodAsterisk; + result.id = .PeriodAsterisk; self.index += 1; break; }, else => { - result.id = Token.Id.Period; + result.id = .Period; break; }, }, - State.Period2 => switch (c) { + .period_2 => switch (c) { '.' => { - result.id = Token.Id.Ellipsis3; + result.id = .Ellipsis3; self.index += 1; break; }, else => { - result.id = Token.Id.Ellipsis2; + result.id = .Ellipsis2; break; }, }, - State.Slash => switch (c) { + .slash => switch (c) { '/' => { - state = State.LineCommentStart; - result.id = Token.Id.LineComment; + state = .line_comment_start; + result.id = .LineComment; }, '=' => { - result.id = Token.Id.SlashEqual; + result.id = .SlashEqual; self.index += 1; break; }, else => { - result.id = Token.Id.Slash; + result.id = .Slash; break; }, }, - State.LineCommentStart => switch (c) { + .line_comment_start => switch (c) { '/' => { - state = State.DocCommentStart; + state = .doc_comment_start; }, '!' => { - result.id = Token.Id.ContainerDocComment; - state = State.ContainerDocComment; + result.id = .ContainerDocComment; + state = .container_doc_comment; }, '\n' => break, else => { - state = State.LineComment; + state = .line_comment; self.checkLiteralCharacter(); }, }, - State.DocCommentStart => switch (c) { + .doc_comment_start => switch (c) { '/' => { - state = State.LineComment; + state = .line_comment; }, '\n' => { - result.id = Token.Id.DocComment; + result.id = .DocComment; break; }, else => { - state = State.DocComment; - result.id = Token.Id.DocComment; + state = .doc_comment; + result.id = .DocComment; self.checkLiteralCharacter(); }, }, - State.LineComment, State.DocComment, State.ContainerDocComment => switch (c) { + .line_comment, .doc_comment, .container_doc_comment => switch (c) { '\n' => break, else => self.checkLiteralCharacter(), }, - State.Zero => switch (c) { + .zero => switch (c) { 'b' => { - state = State.IntegerLiteralBinNoUnderscore; + state = .int_literal_bin_no_underscore; }, 'o' => { - state = State.IntegerLiteralOctNoUnderscore; + state = .int_literal_oct_no_underscore; }, 'x' => { - state = State.IntegerLiteralHexNoUnderscore; + state = .int_literal_hex_no_underscore; }, '0'...'9', '_', '.', 'e', 'E' => { // reinterpret as a decimal number self.index -= 1; - state = State.IntegerLiteralDec; + state = .int_literal_dec; }, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.IntegerLiteralBinNoUnderscore => switch (c) { + .int_literal_bin_no_underscore => switch (c) { '0'...'1' => { - state = State.IntegerLiteralBin; + state = .int_literal_bin; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.IntegerLiteralBin => switch (c) { + .int_literal_bin => switch (c) { '_' => { - state = State.IntegerLiteralBinNoUnderscore; + state = .int_literal_bin_no_underscore; }, '0'...'1' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.IntegerLiteralOctNoUnderscore => switch (c) { + .int_literal_oct_no_underscore => switch (c) { '0'...'7' => { - state = State.IntegerLiteralOct; + state = .int_literal_oct; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.IntegerLiteralOct => switch (c) { + .int_literal_oct => switch (c) { '_' => { - state = State.IntegerLiteralOctNoUnderscore; + state = .int_literal_oct_no_underscore; }, '0'...'7' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.IntegerLiteralDecNoUnderscore => switch (c) { + .int_literal_dec_no_underscore => switch (c) { '0'...'9' => { - state = State.IntegerLiteralDec; + state = .int_literal_dec; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.IntegerLiteralDec => switch (c) { + .int_literal_dec => switch (c) { '_' => { - state = State.IntegerLiteralDecNoUnderscore; + state = .int_literal_dec_no_underscore; }, '.' => { - state = State.NumberDotDec; - result.id = Token.Id.FloatLiteral; + state = .num_dot_dec; + result.id = .FloatLiteral; }, 'e', 'E' => { - state = State.FloatExponentUnsigned; - result.id = Token.Id.FloatLiteral; + state = .float_exponent_unsigned; + result.id = .FloatLiteral; }, '0'...'9' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.IntegerLiteralHexNoUnderscore => switch (c) { + .int_literal_hex_no_underscore => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { - state = State.IntegerLiteralHex; + state = .int_literal_hex; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.IntegerLiteralHex => switch (c) { + .int_literal_hex => switch (c) { '_' => { - state = State.IntegerLiteralHexNoUnderscore; + state = .int_literal_hex_no_underscore; }, '.' => { - state = State.NumberDotHex; - result.id = Token.Id.FloatLiteral; + state = .num_dot_hex; + result.id = .FloatLiteral; }, 'p', 'P' => { - state = State.FloatExponentUnsigned; - result.id = Token.Id.FloatLiteral; + state = .float_exponent_unsigned; + result.id = .FloatLiteral; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.NumberDotDec => switch (c) { + .num_dot_dec => switch (c) { '.' => { self.index -= 1; - state = State.Start; + state = .start; break; }, 'e', 'E' => { - state = State.FloatExponentUnsigned; + state = .float_exponent_unsigned; }, '0'...'9' => { - result.id = Token.Id.FloatLiteral; - state = State.FloatFractionDec; + result.id = .FloatLiteral; + state = .float_fraction_dec; }, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.NumberDotHex => switch (c) { + .num_dot_hex => switch (c) { '.' => { self.index -= 1; - state = State.Start; + state = .start; break; }, 'p', 'P' => { - state = State.FloatExponentUnsigned; + state = .float_exponent_unsigned; }, '0'...'9', 'a'...'f', 'A'...'F' => { - result.id = Token.Id.FloatLiteral; - state = State.FloatFractionHex; + result.id = .FloatLiteral; + state = .float_fraction_hex; }, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.FloatFractionDecNoUnderscore => switch (c) { + .float_fraction_dec_no_underscore => switch (c) { '0'...'9' => { - state = State.FloatFractionDec; + state = .float_fraction_dec; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.FloatFractionDec => switch (c) { + .float_fraction_dec => switch (c) { '_' => { - state = State.FloatFractionDecNoUnderscore; + state = .float_fraction_dec_no_underscore; }, 'e', 'E' => { - state = State.FloatExponentUnsigned; + state = .float_exponent_unsigned; }, '0'...'9' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.FloatFractionHexNoUnderscore => switch (c) { + .float_fraction_hex_no_underscore => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { - state = State.FloatFractionHex; + state = .float_fraction_hex; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.FloatFractionHex => switch (c) { + .float_fraction_hex => switch (c) { '_' => { - state = State.FloatFractionHexNoUnderscore; + state = .float_fraction_hex_no_underscore; }, 'p', 'P' => { - state = State.FloatExponentUnsigned; + state = .float_exponent_unsigned; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, }, - State.FloatExponentUnsigned => switch (c) { + .float_exponent_unsigned => switch (c) { '+', '-' => { - state = State.FloatExponentNumberNoUnderscore; + state = .float_exponent_num_no_underscore; }, else => { // reinterpret as a normal exponent number self.index -= 1; - state = State.FloatExponentNumberNoUnderscore; + state = .float_exponent_num_no_underscore; }, }, - State.FloatExponentNumberNoUnderscore => switch (c) { + .float_exponent_num_no_underscore => switch (c) { '0'...'9' => { - state = State.FloatExponentNumber; + state = .float_exponent_num; }, else => { - result.id = Token.Id.Invalid; + result.id = .Invalid; break; }, }, - State.FloatExponentNumber => switch (c) { + .float_exponent_num => switch (c) { '_' => { - state = State.FloatExponentNumberNoUnderscore; + state = .float_exponent_num_no_underscore; }, '0'...'9' => {}, else => { if (isIdentifierChar(c)) { - result.id = Token.Id.Invalid; + result.id = .Invalid; } break; }, @@ -1297,123 +1297,123 @@ pub const Tokenizer = struct { } } else if (self.index == self.buffer.len) { switch (state) { - State.Start, - State.IntegerLiteralDec, - State.IntegerLiteralBin, - State.IntegerLiteralOct, - State.IntegerLiteralHex, - State.NumberDotDec, - State.NumberDotHex, - State.FloatFractionDec, - State.FloatFractionHex, - State.FloatExponentNumber, - State.StringLiteral, // find this error later - State.MultilineStringLiteralLine, - State.Builtin, + .start, + .int_literal_dec, + .int_literal_bin, + .int_literal_oct, + .int_literal_hex, + .num_dot_dec, + .num_dot_hex, + .float_fraction_dec, + .float_fraction_hex, + .float_exponent_num, + .string_literal, // find this error later + .multiline_string_literal_line, + .builtin, => {}, - State.Identifier => { + .identifier => { if (Token.getKeyword(self.buffer[result.start..self.index])) |id| { result.id = id; } }, - State.LineCommentStart, State.LineComment => { - result.id = Token.Id.LineComment; - }, - State.DocComment, State.DocCommentStart => { - result.id = Token.Id.DocComment; - }, - State.ContainerDocComment => { - result.id = Token.Id.ContainerDocComment; - }, - - State.IntegerLiteralDecNoUnderscore, - State.IntegerLiteralBinNoUnderscore, - State.IntegerLiteralOctNoUnderscore, - State.IntegerLiteralHexNoUnderscore, - State.FloatFractionDecNoUnderscore, - State.FloatFractionHexNoUnderscore, - State.FloatExponentNumberNoUnderscore, - State.FloatExponentUnsigned, - State.SawAtSign, - State.Backslash, - State.CharLiteral, - State.CharLiteralBackslash, - State.CharLiteralHexEscape, - State.CharLiteralUnicodeEscapeSawU, - State.CharLiteralUnicodeEscape, - State.CharLiteralUnicodeInvalid, - State.CharLiteralEnd, - State.CharLiteralUnicode, - State.StringLiteralBackslash, + .line_comment, .line_comment_start => { + result.id = .LineComment; + }, + .doc_comment, .doc_comment_start => { + result.id = .DocComment; + }, + .container_doc_comment => { + result.id = .ContainerDocComment; + }, + + .int_literal_dec_no_underscore, + .int_literal_bin_no_underscore, + .int_literal_oct_no_underscore, + .int_literal_hex_no_underscore, + .float_fraction_dec_no_underscore, + .float_fraction_hex_no_underscore, + .float_exponent_num_no_underscore, + .float_exponent_unsigned, + .saw_at_sign, + .backslash, + .char_literal, + .char_literal_backslash, + .char_literal_hex_escape, + .char_literal_unicode_escape_saw_u, + .char_literal_unicode_escape, + .char_literal_unicode_invalid, + .char_literal_end, + .char_literal_unicode, + .string_literal_backslash, => { - result.id = Token.Id.Invalid; + result.id = .Invalid; }, - State.Equal => { - result.id = Token.Id.Equal; + .equal => { + result.id = .Equal; }, - State.Bang => { - result.id = Token.Id.Bang; + .bang => { + result.id = .Bang; }, - State.Minus => { - result.id = Token.Id.Minus; + .minus => { + result.id = .Minus; }, - State.Slash => { - result.id = Token.Id.Slash; + .slash => { + result.id = .Slash; }, - State.Zero => { - result.id = Token.Id.IntegerLiteral; + .zero => { + result.id = .IntegerLiteral; }, - State.Ampersand => { - result.id = Token.Id.Ampersand; + .ampersand => { + result.id = .Ampersand; }, - State.Period => { - result.id = Token.Id.Period; + .period => { + result.id = .Period; }, - State.Period2 => { - result.id = Token.Id.Ellipsis2; + .period_2 => { + result.id = .Ellipsis2; }, - State.Pipe => { - result.id = Token.Id.Pipe; + .pipe => { + result.id = .Pipe; }, - State.AngleBracketAngleBracketRight => { - result.id = Token.Id.AngleBracketAngleBracketRight; + .angle_bracket_angle_bracket_right => { + result.id = .AngleBracketAngleBracketRight; }, - State.AngleBracketRight => { - result.id = Token.Id.AngleBracketRight; + .angle_bracket_right => { + result.id = .AngleBracketRight; }, - State.AngleBracketAngleBracketLeft => { - result.id = Token.Id.AngleBracketAngleBracketLeft; + .angle_bracket_angle_bracket_left => { + result.id = .AngleBracketAngleBracketLeft; }, - State.AngleBracketLeft => { - result.id = Token.Id.AngleBracketLeft; + .angle_bracket_left => { + result.id = .AngleBracketLeft; }, - State.PlusPercent => { - result.id = Token.Id.PlusPercent; + .plus_percent => { + result.id = .PlusPercent; }, - State.Plus => { - result.id = Token.Id.Plus; + .plus => { + result.id = .Plus; }, - State.Percent => { - result.id = Token.Id.Percent; + .percent => { + result.id = .Percent; }, - State.Caret => { - result.id = Token.Id.Caret; + .caret => { + result.id = .Caret; }, - State.AsteriskPercent => { - result.id = Token.Id.AsteriskPercent; + .asterisk_percent => { + result.id = .AsteriskPercent; }, - State.Asterisk => { - result.id = Token.Id.Asterisk; + .asterisk => { + result.id = .Asterisk; }, - State.MinusPercent => { - result.id = Token.Id.MinusPercent; + .minus_percent => { + result.id = .MinusPercent; }, } } - if (result.id == Token.Id.Eof) { + if (result.id == .Eof) { if (self.pending_invalid_token) |token| { self.pending_invalid_token = null; return token; @@ -1428,8 +1428,8 @@ pub const Tokenizer = struct { if (self.pending_invalid_token != null) return; const invalid_length = self.getInvalidCharacterLength(); if (invalid_length == 0) return; - self.pending_invalid_token = Token{ - .id = Token.Id.Invalid, + self.pending_invalid_token = .{ + .id = .Invalid, .start = self.index, .end = self.index + invalid_length, }; @@ -1474,7 +1474,7 @@ pub const Tokenizer = struct { }; test "tokenizer" { - testTokenize("test", &[_]Token.Id{Token.Id.Keyword_test}); + testTokenize("test", &[_]Token.Id{.Keyword_test}); } test "tokenizer - unknown length pointer and then c pointer" { @@ -1482,15 +1482,15 @@ test "tokenizer - unknown length pointer and then c pointer" { \\[*]u8 \\[*c]u8 , &[_]Token.Id{ - Token.Id.LBracket, - Token.Id.Asterisk, - Token.Id.RBracket, - Token.Id.Identifier, - Token.Id.LBracket, - Token.Id.Asterisk, - Token.Id.Identifier, - Token.Id.RBracket, - Token.Id.Identifier, + .LBracket, + .Asterisk, + .RBracket, + .Identifier, + .LBracket, + .Asterisk, + .Identifier, + .RBracket, + .Identifier, }); } @@ -1561,125 +1561,125 @@ test "tokenizer - char literal with unicode code point" { test "tokenizer - float literal e exponent" { testTokenize("a = 4.94065645841246544177e-324;\n", &[_]Token.Id{ - Token.Id.Identifier, - Token.Id.Equal, - Token.Id.FloatLiteral, - Token.Id.Semicolon, + .Identifier, + .Equal, + .FloatLiteral, + .Semicolon, }); } test "tokenizer - float literal p exponent" { testTokenize("a = 0x1.a827999fcef32p+1022;\n", &[_]Token.Id{ - Token.Id.Identifier, - Token.Id.Equal, - Token.Id.FloatLiteral, - Token.Id.Semicolon, + .Identifier, + .Equal, + .FloatLiteral, + .Semicolon, }); } test "tokenizer - chars" { - testTokenize("'c'", &[_]Token.Id{Token.Id.CharLiteral}); + testTokenize("'c'", &[_]Token.Id{.CharLiteral}); } test "tokenizer - invalid token characters" { - testTokenize("#", &[_]Token.Id{Token.Id.Invalid}); - testTokenize("`", &[_]Token.Id{Token.Id.Invalid}); - testTokenize("'c", &[_]Token.Id{Token.Id.Invalid}); - testTokenize("'", &[_]Token.Id{Token.Id.Invalid}); - testTokenize("''", &[_]Token.Id{ Token.Id.Invalid, Token.Id.Invalid }); + testTokenize("#", &[_]Token.Id{.Invalid}); + testTokenize("`", &[_]Token.Id{.Invalid}); + testTokenize("'c", &[_]Token.Id{.Invalid}); + testTokenize("'", &[_]Token.Id{.Invalid}); + testTokenize("''", &[_]Token.Id{ .Invalid, .Invalid }); } test "tokenizer - invalid literal/comment characters" { testTokenize("\"\x00\"", &[_]Token.Id{ - Token.Id.StringLiteral, - Token.Id.Invalid, + .StringLiteral, + .Invalid, }); testTokenize("//\x00", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\x1f", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\x7f", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); } test "tokenizer - utf8" { - testTokenize("//\xc2\x80", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{Token.Id.LineComment}); + testTokenize("//\xc2\x80", &[_]Token.Id{.LineComment}); + testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{.LineComment}); } test "tokenizer - invalid utf8" { testTokenize("//\x80", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xbf", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xf8", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xff", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xc2\xc0", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xe0", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xf0", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); } test "tokenizer - illegal unicode codepoints" { // unicode newline characters.U+0085, U+2028, U+2029 - testTokenize("//\xc2\x84", &[_]Token.Id{Token.Id.LineComment}); + testTokenize("//\xc2\x84", &[_]Token.Id{.LineComment}); testTokenize("//\xc2\x85", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); - testTokenize("//\xc2\x86", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("//\xe2\x80\xa7", &[_]Token.Id{Token.Id.LineComment}); + testTokenize("//\xc2\x86", &[_]Token.Id{.LineComment}); + testTokenize("//\xe2\x80\xa7", &[_]Token.Id{.LineComment}); testTokenize("//\xe2\x80\xa8", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); testTokenize("//\xe2\x80\xa9", &[_]Token.Id{ - Token.Id.LineComment, - Token.Id.Invalid, + .LineComment, + .Invalid, }); - testTokenize("//\xe2\x80\xaa", &[_]Token.Id{Token.Id.LineComment}); + testTokenize("//\xe2\x80\xaa", &[_]Token.Id{.LineComment}); } test "tokenizer - string identifier and builtin fns" { testTokenize( \\const @"if" = @import("std"); , &[_]Token.Id{ - Token.Id.Keyword_const, - Token.Id.Identifier, - Token.Id.Equal, - Token.Id.Builtin, - Token.Id.LParen, - Token.Id.StringLiteral, - Token.Id.RParen, - Token.Id.Semicolon, + .Keyword_const, + .Identifier, + .Equal, + .Builtin, + .LParen, + .StringLiteral, + .RParen, + .Semicolon, }); } @@ -1687,26 +1687,26 @@ test "tokenizer - multiline string literal with literal tab" { testTokenize( \\\\foo bar , &[_]Token.Id{ - Token.Id.MultilineStringLiteralLine, + .MultilineStringLiteralLine, }); } test "tokenizer - pipe and then invalid" { testTokenize("||=", &[_]Token.Id{ - Token.Id.PipePipe, - Token.Id.Equal, + .PipePipe, + .Equal, }); } test "tokenizer - line comment and doc comment" { - testTokenize("//", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("// a / b", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("// /", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("/// a", &[_]Token.Id{Token.Id.DocComment}); - testTokenize("///", &[_]Token.Id{Token.Id.DocComment}); - testTokenize("////", &[_]Token.Id{Token.Id.LineComment}); - testTokenize("//!", &[_]Token.Id{Token.Id.ContainerDocComment}); - testTokenize("//!!", &[_]Token.Id{Token.Id.ContainerDocComment}); + testTokenize("//", &[_]Token.Id{.LineComment}); + testTokenize("// a / b", &[_]Token.Id{.LineComment}); + testTokenize("// /", &[_]Token.Id{.LineComment}); + testTokenize("/// a", &[_]Token.Id{.DocComment}); + testTokenize("///", &[_]Token.Id{.DocComment}); + testTokenize("////", &[_]Token.Id{.LineComment}); + testTokenize("//!", &[_]Token.Id{.ContainerDocComment}); + testTokenize("//!!", &[_]Token.Id{.ContainerDocComment}); } test "tokenizer - line comment followed by identifier" { @@ -1715,28 +1715,28 @@ test "tokenizer - line comment followed by identifier" { \\ // another \\ Another, , &[_]Token.Id{ - Token.Id.Identifier, - Token.Id.Comma, - Token.Id.LineComment, - Token.Id.Identifier, - Token.Id.Comma, + .Identifier, + .Comma, + .LineComment, + .Identifier, + .Comma, }); } test "tokenizer - UTF-8 BOM is recognized and skipped" { testTokenize("\xEF\xBB\xBFa;\n", &[_]Token.Id{ - Token.Id.Identifier, - Token.Id.Semicolon, + .Identifier, + .Semicolon, }); } test "correctly parse pointer assignment" { testTokenize("b.*=3;\n", &[_]Token.Id{ - Token.Id.Identifier, - Token.Id.PeriodAsterisk, - Token.Id.Equal, - Token.Id.IntegerLiteral, - Token.Id.Semicolon, + .Identifier, + .PeriodAsterisk, + .Equal, + .IntegerLiteral, + .Semicolon, }); } @@ -1979,5 +1979,5 @@ fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { } } const last_token = tokenizer.next(); - std.testing.expect(last_token.id == Token.Id.Eof); + std.testing.expect(last_token.id == .Eof); } -- cgit v1.2.3 From ad71d959d7d1b5f3a771dc2c1eaf37dbd7cd0852 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 23:28:04 +0300 Subject: correctly recover from invalid top level declarations --- lib/std/zig/parse.zig | 20 +++++++++++++------- lib/std/zig/parser_test.zig | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 6052d92d6a..d8d1d4b427 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -130,7 +130,13 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All const visib_token = eatToken(it, .Keyword_pub); - if (try parseTopLevelDecl(arena, it, tree)) |node| { + if (parseTopLevelDecl(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => { + // try again + continue; + }, + }) |node| { if (field_state == .seen) { field_state = .{ .end = visib_token orelse node.firstToken() }; } @@ -315,7 +321,7 @@ fn parseTopLevelComptime(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?* /// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) /// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl /// / KEYWORD_usingnamespace Expr SEMICOLON -fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!?*Node { +fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!?*Node { var lib_name: ?*Node = null; const extern_export_inline_token = blk: { if (eatToken(it, .Keyword_export)) |token| break :blk token; @@ -334,7 +340,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocat // this fn will likely have a body so we // use findEndOfBlock instead of findToken. findEndOfBlock(it); - return null; + return error.ParseError; }, }) |node| { const fn_node = node.cast(Node.FnProto).?; @@ -373,7 +379,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocat error.ParseError => { // try to skip to next decl findToken(it, .Semicolon); - return null; + return error.ParseError; }, }) |node| { var var_decl = node.cast(Node.VarDecl).?; @@ -388,8 +394,8 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocat try tree.errors.push(.{ .ExpectedVarDecl = .{ .token = it.index }, }); - // ignore this, try to find next decl by skipping the next block - findEndOfBlock(it); + // ignore this and try again; + return error.ParseError; } if (extern_export_inline_token) |token| { @@ -404,7 +410,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocat error.ParseError => { // try to skip to next decl findToken(it, .Semicolon); - return null; + return error.ParseError; }, }; } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index e3bc20bd10..7ddf9cc9c8 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -62,6 +62,28 @@ test "recovery: missing return type" { }); } +test "recovery: continue after invalid decl" { + try testError( + \\fn foo { + \\ inline; + \\} + \\test "" { + \\ a && b; + \\} + , &[_]Error{ + .ExpectedToken, + .InvalidAnd, + }); + try testError( + \\threadlocal test "" { + \\ a && b; + \\} + , &[_]Error{ + .ExpectedVarDecl, + .InvalidAnd, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, -- cgit v1.2.3 From 1f81887a7863545d8b89bff6cc7d31941da0abf0 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 13 May 2020 23:35:58 +0300 Subject: recover after invalid inline/extern --- lib/std/zig/ast.zig | 4 ++++ lib/std/zig/parse.zig | 17 ++++++++++------- lib/std/zig/parser_test.zig | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index afb7fce23e..26bff31bbd 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -129,6 +129,7 @@ pub const Error = union(enum) { ExpectedStatement: ExpectedStatement, ExpectedVarDeclOrFn: ExpectedVarDeclOrFn, ExpectedVarDecl: ExpectedVarDecl, + ExpectedFn: ExpectedFn, ExpectedReturnType: ExpectedReturnType, ExpectedAggregateKw: ExpectedAggregateKw, UnattachedDocComment: UnattachedDocComment, @@ -179,6 +180,7 @@ pub const Error = union(enum) { .ExpectedStatement => |*x| return x.render(tokens, stream), .ExpectedVarDeclOrFn => |*x| return x.render(tokens, stream), .ExpectedVarDecl => |*x| return x.render(tokens, stream), + .ExpectedFn => |*x| return x.render(tokens, stream), .ExpectedReturnType => |*x| return x.render(tokens, stream), .ExpectedAggregateKw => |*x| return x.render(tokens, stream), .UnattachedDocComment => |*x| return x.render(tokens, stream), @@ -231,6 +233,7 @@ pub const Error = union(enum) { .ExpectedStatement => |x| return x.token, .ExpectedVarDeclOrFn => |x| return x.token, .ExpectedVarDecl => |x| return x.token, + .ExpectedFn => |x| return x.token, .ExpectedReturnType => |x| return x.token, .ExpectedAggregateKw => |x| return x.token, .UnattachedDocComment => |x| return x.token, @@ -280,6 +283,7 @@ pub const Error = union(enum) { pub const ExpectedStatement = SingleTokenError("Expected statement, found '{}'"); pub const ExpectedVarDeclOrFn = SingleTokenError("Expected variable declaration or function, found '{}'"); pub const ExpectedVarDecl = SingleTokenError("Expected variable declaration, found '{}'"); + pub const ExpectedFn = SingleTokenError("Expected function, found '{}'"); pub const ExpectedReturnType = SingleTokenError("Expected 'var' or return type expression, found '{}'"); pub const ExpectedAggregateKw = SingleTokenError("Expected '" ++ Token.Id.Keyword_struct.symbol() ++ "', '" ++ Token.Id.Keyword_union.symbol() ++ "', or '" ++ Token.Id.Keyword_enum.symbol() ++ "', found '{}'"); pub const ExpectedEqOrSemi = SingleTokenError("Expected '=' or ';', found '{}'"); diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index d8d1d4b427..cbd5336ca2 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -360,15 +360,17 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? try tree.errors.push(.{ .ExpectedSemiOrLBrace = .{ .token = it.index }, }); - return null; + return error.ParseError; } if (extern_export_inline_token) |token| { if (tree.tokens.at(token).id == .Keyword_inline or tree.tokens.at(token).id == .Keyword_noinline) { - putBackToken(it, token); - return null; + try tree.errors.push(.{ + .ExpectedFn = .{ .token = it.index }, + }); + return error.ParseError; } } @@ -399,10 +401,11 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? } if (extern_export_inline_token) |token| { - if (lib_name) |string_literal_node| - putBackToken(it, string_literal_node.cast(Node.StringLiteral).?.token); - putBackToken(it, token); - return null; + try tree.errors.push(.{ + .ExpectedVarDeclOrFn = .{ .token = it.index }, + }); + // ignore this and try again; + return error.ParseError; } return parseUse(arena, it, tree) catch |err| switch (err) { diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 7ddf9cc9c8..d837769802 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -84,6 +84,21 @@ test "recovery: continue after invalid decl" { }); } +test "recovery: invalid extern/inline" { + try testError( + \\inline test "" { a && b; } + , &[_]Error{ + .ExpectedFn, + .InvalidAnd, + }); + try testError( + \\extern "" test "" { a && b; } + , &[_]Error{ + .ExpectedVarDeclOrFn, + .InvalidAnd, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, -- cgit v1.2.3 From c3b76d091337ba60d2ccb3632b525e5a42d1f2c5 Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 00:16:56 +0300 Subject: recover from invalid builtin/async call --- lib/std/zig/ast.zig | 1 + lib/std/zig/parse.zig | 18 +++++++++++++----- lib/std/zig/parser_test.zig | 9 ++++++--- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 26bff31bbd..6fb1ce012b 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -926,6 +926,7 @@ pub const Node = struct { if (i < 1) return node; i -= 1; }, + .Invalid => unreachable, } if (self.body_node) |body_node| { diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index cbd5336ca2..6c664bbda2 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -172,6 +172,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All .ExpectedPubItem = .{ .token = it.index }, }); // ignore this pub + continue; } if (parseContainerField(arena, it, tree) catch |err| switch (err) { @@ -1017,7 +1018,7 @@ fn parsePrimaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node const node = try arena.create(Node.ControlFlowExpression); node.* = .{ .ltoken = token, - .kind = Node.ControlFlowExpression.Kind{ .Break = label }, + .kind = .{ .Break = label }, .rhs = expr_node, }; return &node.base; @@ -1053,7 +1054,7 @@ fn parsePrimaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node const node = try arena.create(Node.ControlFlowExpression); node.* = .{ .ltoken = token, - .kind = Node.ControlFlowExpression.Kind{ .Continue = label }, + .kind = .{ .Continue = label }, .rhs = null, }; return &node.base; @@ -1077,7 +1078,7 @@ fn parsePrimaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node const node = try arena.create(Node.ControlFlowExpression); node.* = .{ .ltoken = token, - .kind = Node.ControlFlowExpression.Kind.Return, + .kind = .Return, .rhs = expr_node, }; return &node.base; @@ -1322,7 +1323,8 @@ fn parseSuffixExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { try tree.errors.push(.{ .ExpectedParamList = .{ .token = it.index }, }); - return null; + // ignore this, continue parsing + return res; }; const node = try arena.create(Node.SuffixOp); node.* = .{ @@ -2908,7 +2910,13 @@ fn parseBuiltinCall(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node try tree.errors.push(.{ .ExpectedParamList = .{ .token = it.index }, }); - return error.ParseError; + + // lets pretend this was an identifier so we can continue parsing + const node = try arena.create(Node.Identifier); + node.* = .{ + .token = token, + }; + return &node.base; }; const node = try arena.create(Node.BuiltinCall); node.* = .{ diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index d837769802..34c61b6bb4 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -67,19 +67,22 @@ test "recovery: continue after invalid decl" { \\fn foo { \\ inline; \\} - \\test "" { - \\ a && b; + \\pub test "" { + \\ async a && b; \\} , &[_]Error{ .ExpectedToken, + .ExpectedPubItem, + .ExpectedParamList, .InvalidAnd, }); try testError( \\threadlocal test "" { - \\ a && b; + \\ @a && b; \\} , &[_]Error{ .ExpectedVarDecl, + .ExpectedParamList, .InvalidAnd, }); } -- cgit v1.2.3 From ac319b27348f60e37f3f1b478fee00202280715f Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 11:19:14 +0300 Subject: remove useless nosuspend parsing nosuspend cannot be used in a type expression and all other use casesare covered by PrimaryExpr --- doc/langref.html.in | 3 +++ lib/std/zig/parse.zig | 10 ---------- src/parser.cpp | 9 --------- 3 files changed, 3 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/doc/langref.html.in b/doc/langref.html.in index 5bf91ca70f..0733540478 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -10096,6 +10096,7 @@ ContainerField <- IDENTIFIER (COLON TypeExpr)? (EQUAL Expr)? Statement <- KEYWORD_comptime? VarDecl / KEYWORD_comptime BlockExprStatement + / KEYWORD_nosuspend BlockExprStatement / KEYWORD_suspend (SEMICOLON / BlockExprStatement) / KEYWORD_defer BlockExprStatement / KEYWORD_errdefer BlockExprStatement @@ -10152,6 +10153,7 @@ PrimaryExpr / IfExpr / KEYWORD_break BreakLabel? Expr? / KEYWORD_comptime Expr + / KEYWORD_nosuspend Expr / KEYWORD_continue BreakLabel? / KEYWORD_resume Expr / KEYWORD_return Expr? @@ -10514,6 +10516,7 @@ KEYWORD_for <- 'for' end_of_word KEYWORD_if <- 'if' end_of_word KEYWORD_inline <- 'inline' end_of_word KEYWORD_noalias <- 'noalias' end_of_word +KEYWORD_nosuspend <- 'nosuspend' end_of_word KEYWORD_null <- 'null' end_of_word KEYWORD_or <- 'or' end_of_word KEYWORD_orelse <- 'orelse' end_of_word diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 6c664bbda2..c16dba4a11 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -1389,7 +1389,6 @@ fn parseSuffixExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { /// / IfTypeExpr /// / INTEGER /// / KEYWORD_comptime TypeExpr -/// / KEYWORD_nosuspend TypeExpr /// / KEYWORD_error DOT IDENTIFIER /// / KEYWORD_false /// / KEYWORD_null @@ -1428,15 +1427,6 @@ fn parsePrimaryTypeExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*N }; return &node.base; } - if (eatToken(it, .Keyword_nosuspend)) |token| { - const expr = (try parseTypeExpr(arena, it, tree)) orelse return null; - const node = try arena.create(Node.Nosuspend); - node.* = .{ - .nosuspend_token = token, - .expr = expr, - }; - return &node.base; - } if (eatToken(it, .Keyword_error)) |token| { const period = try expectToken(it, tree, .Period); const identifier = try expectNode(arena, it, tree, parseIdentifier, .{ diff --git a/src/parser.cpp b/src/parser.cpp index 1a929cd1b1..fc9814f393 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -1609,7 +1609,6 @@ static AstNode *ast_parse_suffix_expr(ParseContext *pc) { // / IfTypeExpr // / INTEGER // / KEYWORD_comptime TypeExpr -// / KEYWORD_nosuspend TypeExpr // / KEYWORD_error DOT IDENTIFIER // / KEYWORD_false // / KEYWORD_null @@ -1711,14 +1710,6 @@ static AstNode *ast_parse_primary_type_expr(ParseContext *pc) { return res; } - Token *nosuspend = eat_token_if(pc, TokenIdKeywordNoSuspend); - if (nosuspend != nullptr) { - AstNode *expr = ast_expect(pc, ast_parse_type_expr); - AstNode *res = ast_create_node(pc, NodeTypeNoSuspend, nosuspend); - res->data.nosuspend_expr.expr = expr; - return res; - } - Token *error = eat_token_if(pc, TokenIdKeywordError); if (error != nullptr) { Token *dot = expect_token(pc, TokenIdDot); -- cgit v1.2.3 From 89f2923a8aad5119b1506c3b8e08464e132c228e Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 11:19:50 +0300 Subject: recover from missing semicolon --- lib/std/zig/parse.zig | 16 +++++++++++++--- lib/std/zig/parser_test.zig | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index c16dba4a11..24bb164685 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -667,7 +667,12 @@ fn parseStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!?*No if (try parseLabeledStatement(arena, it, tree)) |node| return node; if (try parseSwitchExpr(arena, it, tree)) |node| return node; if (try parseAssignExpr(arena, it, tree)) |node| { - _ = try expectToken(it, tree, .Semicolon); + _ = eatToken(it, .Semicolon) orelse { + try tree.errors.push(.{ + .ExpectedToken = .{ .token = it.index, .expected_id = .Semicolon }, + }); + // pretend we saw a semicolon and continue parsing + }; return node; } @@ -911,7 +916,12 @@ fn parseWhileStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*No fn parseBlockExprStatement(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { if (try parseBlockExpr(arena, it, tree)) |node| return node; if (try parseAssignExpr(arena, it, tree)) |node| { - _ = try expectToken(it, tree, .Semicolon); + _ = eatToken(it, .Semicolon) orelse { + try tree.errors.push(.{ + .ExpectedToken = .{ .token = it.index, .expected_id = .Semicolon }, + }); + // pretend we saw a semicolon and continue parsing + }; return node; } return null; @@ -3072,7 +3082,7 @@ fn parseDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node.D } /// Eat a single-line doc comment on the same line as another node -fn parseAppendedDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree, after_token: TokenIndex) Allocator.Error!?*Node.DocComment { +fn parseAppendedDocComment(arena: *Allocator, it: *TokenIterator, tree: *Tree, after_token: TokenIndex) !?*Node.DocComment { const comment_token = eatToken(it, .DocComment) orelse return null; if (tree.tokensOnSameLine(after_token, comment_token)) { const node = try arena.create(Node.DocComment); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 34c61b6bb4..a367c2ff0b 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -102,6 +102,23 @@ test "recovery: invalid extern/inline" { }); } +test "recovery: missing semicolon" { + try testError( + \\test "" { + \\ comptime a && b + \\ c && d + \\ @foo + \\} + , &[_]Error{ + .InvalidAnd, + .ExpectedToken, + .InvalidAnd, + .ExpectedToken, + .ExpectedParamList, + .ExpectedToken, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, -- cgit v1.2.3 From a32e240540895af536fdeb98db448ee745def59a Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 12:09:40 +0300 Subject: improve recovery from invalid container members Instead of trying to find the end of the block or the next comma/semicolon we no try to find the next token that can start a container member. --- lib/std/zig/parse.zig | 98 ++++++++++++++++++++++++++++++++++++--------- lib/std/zig/parser_test.zig | 21 ++++++++++ 2 files changed, 99 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 24bb164685..b4b80669a1 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -59,10 +59,13 @@ fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error node.* = .{ .decls = try parseContainerMembers(arena, it, tree), .eof_token = eatToken(it, .Eof) orelse blk: { + // parseContainerMembers will try to skip as much + // invalid tokens as it can so this can only be a '}' + const tok = eatToken(it, .RBrace).?; try tree.errors.push(.{ - .ExpectedContainerMembers = .{ .token = it.index }, + .ExpectedContainerMembers = .{ .token = tok }, }); - break :blk undefined; + break :blk tok; }, }; return node; @@ -101,7 +104,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All if (parseTestDecl(arena, it, tree) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { - findEndOfBlock(it); + findNextContainerMember(it); continue; }, }) |node| { @@ -116,7 +119,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All if (parseTopLevelComptime(arena, it, tree) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { - findEndOfBlock(it); + findNextContainerMember(it); continue; }, }) |node| { @@ -178,8 +181,8 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All if (parseContainerField(arena, it, tree) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { - // attempt to recover by finding a comma - findToken(it, .Comma); + // attempt to recover + findNextContainerMember(it); continue; }, }) |node| { @@ -198,7 +201,21 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All const field = node.cast(Node.ContainerField).?; field.doc_comments = doc_comments; try list.push(node); - const comma = eatToken(it, .Comma) orelse break; + const comma = eatToken(it, .Comma) orelse { + // try to continue parsing + const index = it.index; + findNextContainerMember(it); + switch (it.peek().?.id) { + .Eof, .RBrace => break, + else => { + // add error and continue + try tree.errors.push(.{ + .ExpectedToken = .{ .token = index, .expected_id = .Comma }, + }); + continue; + } + } + }; if (try parseAppendedDocComment(arena, it, tree, comma)) |appended_comment| field.doc_comments = appended_comment; continue; @@ -210,22 +227,63 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All .UnattachedDocComment = .{ .token = doc_comments.?.firstToken() }, }); } - break; + + switch (it.peek().?.id) { + .Eof, .RBrace => break, + else => { + // this was likely not supposed to end yet, + // try to find the next declaration + const index = it.index; + findNextContainerMember(it); + try tree.errors.push(.{ + .ExpectedContainerMembers = .{ .token = index }, + }); + }, + } } return list; } -/// Attempts to find a closing brace. -fn findEndOfBlock(it: *TokenIterator) void { - var count: u32 = 0; +fn findNextContainerMember(it: *TokenIterator) void { + var level: u32 = 0; while (true) { const tok = nextToken(it); switch (tok.ptr.id) { - .LBrace => count += 1, - .RBrace => { - if (count <= 1) return; - count -= 1; + // any of these can start a new top level declaration + .Keyword_test, + .Keyword_comptime, + .Keyword_pub, + .Keyword_export, + .Keyword_extern, + .Keyword_inline, + .Keyword_noinline, + .Keyword_usingnamespace, + .Keyword_threadlocal, + .Keyword_const, + .Keyword_var, + .Keyword_fn, + .Identifier, + => { + if (level == 0) { + putBackToken(it, tok.index); + return; + } + }, + .Comma, .Semicolon => { + // this decl was likely meant to end here + if (level == 0) { + return; + } + }, + .LParen, .LBracket, .LBrace => level += 1, + .RParen, .RBracket, .RBrace => { + if (level == 0) { + // end of container, exit + putBackToken(it, tok.index); + return; + } + level -= 1; }, .Eof => { putBackToken(it, tok.index); @@ -338,9 +396,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? if (parseFnProto(arena, it, tree) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { - // this fn will likely have a body so we - // use findEndOfBlock instead of findToken. - findEndOfBlock(it); + findNextContainerMember(it); return error.ParseError; }, }) |node| { @@ -381,7 +437,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? error.OutOfMemory => return error.OutOfMemory, error.ParseError => { // try to skip to next decl - findToken(it, .Semicolon); + findNextContainerMember(it); return error.ParseError; }, }) |node| { @@ -413,7 +469,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? error.OutOfMemory => return error.OutOfMemory, error.ParseError => { // try to skip to next decl - findToken(it, .Semicolon); + findNextContainerMember(it); return error.ParseError; }, }; @@ -3215,6 +3271,8 @@ fn expectToken(it: *TokenIterator, tree: *Tree, id: Token.Id) Error!TokenIndex { try tree.errors.push(.{ .ExpectedToken = .{ .token = token.index, .expected_id = id }, }); + // go back so that we can recover properly + putBackToken(it, token.index); return error.ParseError; } return token.index; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index a367c2ff0b..8cbf2d4610 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -119,6 +119,25 @@ test "recovery: missing semicolon" { }); } +test "recovery: invalid container members" { + try testError( + \\usingnamespace; + \\foo+ + \\bar@, + \\while (a == 2) { test "" {}} + \\test "" { + \\ a && b + \\} + , &[_]Error{ + .ExpectedExpr, + .ExpectedToken, + .ExpectedToken, + .ExpectedContainerMembers, + .InvalidAnd, + .ExpectedToken, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, @@ -2953,6 +2972,8 @@ test "zig fmt: extern without container keyword returns error" { \\ , &[_]Error{ .ExpectedExpr, + .ExpectedVarDeclOrFn, + .ExpectedContainerMembers, }); } -- cgit v1.2.3 From c4552ee8edec961defb161e1b828a818f8d407bb Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 17:18:14 +0300 Subject: store rparen in ReturnType.Invalid This is useful for getting a partial function signature --- lib/std/zig/ast.zig | 2 +- lib/std/zig/parse.zig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 6fb1ce012b..351162bec0 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -897,7 +897,7 @@ pub const Node = struct { pub const ReturnType = union(enum) { Explicit: *Node, InferErrorSet: *Node, - Invalid, + Invalid: TokenIndex, }; pub fn iterate(self: *FnProto, index: usize) ?*Node { diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index b4b80669a1..98b1e9d329 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -518,7 +518,7 @@ fn parseFnProto(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { // TODO https://github.com/ziglang/zig/issues/3750 const R = Node.FnProto.ReturnType; const return_type = if (return_type_expr == null) - R{ .Invalid = {} } + R{ .Invalid = rparen } else if (exclamation_token != null) R{ .InferErrorSet = return_type_expr.? } else -- cgit v1.2.3 From c77fee03448817366a2cda98915bc31ba8f2b451 Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 14 May 2020 19:56:55 +0300 Subject: fix infinite loop findToken wasn't as generic as I thought it was --- lib/std/zig/parse.zig | 28 ++++++++++++++++------------ lib/std/zig/parser_test.zig | 10 ++++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 98b1e9d329..74945b1b33 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -213,7 +213,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All .ExpectedToken = .{ .token = index, .expected_id = .Comma }, }); continue; - } + }, } }; if (try parseAppendedDocComment(arena, it, tree, comma)) |appended_comment| @@ -245,6 +245,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All return list; } +/// Attempts to find next container member by searching for certain tokens fn findNextContainerMember(it: *TokenIterator) void { var level: u32 = 0; while (true) { @@ -294,27 +295,30 @@ fn findNextContainerMember(it: *TokenIterator) void { } } -/// Attempts to find `wanted` token, keeps track of parentheses. -fn findToken(it: *TokenIterator, wanted: Token.Id) void { - var count: u32 = 0; +/// Attempts to find the next statement by searching for a semicolon +fn findNextStmt(it: *TokenIterator) void { + var level: u32 = 0; while (true) { const tok = nextToken(it); switch (tok.ptr.id) { - .LParen, .LBracket, .LBrace => count += 1, - .RParen, .RBracket, .RBrace => { - if (count == 0) { + .LBrace => level += 1, + .RBrace => { + if (level == 0) { putBackToken(it, tok.index); return; } - count -= 1; + level -= 1; + }, + .Semicolon => { + if (level == 0) { + return; + } }, .Eof => { putBackToken(it, tok.index); return; }, - else => { - if (tok.ptr.id == wanted and count == 0) return; - }, + else => {}, } } } @@ -1186,7 +1190,7 @@ fn parseBlock(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { // try to skip to the next statement - findToken(it, .Semicolon); + findNextStmt(it); continue; }, }) orelse break; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 8cbf2d4610..f92cc5a222 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -138,6 +138,16 @@ test "recovery: invalid container members" { }); } +test "recovery: invalid parameter" { + try testError( + \\fn main() void { + \\ a(comptime T: type) + \\} + , &[_]Error{ + .ExpectedToken, + }); +} + test "zig fmt: top-level fields" { try testCanonical( \\a: did_you_know, -- cgit v1.2.3 From 440189a04ae4baa4a20114fe1d30f0eb585bacc4 Mon Sep 17 00:00:00 2001 From: Vexu Date: Fri, 15 May 2020 11:06:34 +0300 Subject: cleanup* remove unecessary error* properly handle ReturnTypeInvalid in ast.zig functions* assert that the tree is clean in render.zig* simplify parser recovery with top level decls --- lib/std/zig/ast.zig | 8 ++------ lib/std/zig/parse.zig | 38 +++++++++----------------------------- lib/std/zig/parser_test.zig | 5 ++--- lib/std/zig/render.zig | 3 +++ 4 files changed, 16 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 351162bec0..b1441d5b25 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -166,7 +166,6 @@ pub const Error = union(enum) { ExpectedDerefOrUnwrap: ExpectedDerefOrUnwrap, ExpectedSuffixOp: ExpectedSuffixOp, DeclBetweenFields: DeclBetweenFields, - MissingComma: MissingComma, InvalidAnd: InvalidAnd, pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { @@ -217,7 +216,6 @@ pub const Error = union(enum) { .ExpectedDerefOrUnwrap => |*x| return x.render(tokens, stream), .ExpectedSuffixOp => |*x| return x.render(tokens, stream), .DeclBetweenFields => |*x| return x.render(tokens, stream), - .MissingComma => |*x| return x.render(tokens, stream), .InvalidAnd => |*x| return x.render(tokens, stream), } } @@ -270,7 +268,6 @@ pub const Error = union(enum) { .ExpectedDerefOrUnwrap => |x| return x.token, .ExpectedSuffixOp => |x| return x.token, .DeclBetweenFields => |x| return x.token, - .MissingComma => |x| return x.token, .InvalidAnd => |x| return x.token, } } @@ -318,7 +315,6 @@ pub const Error = union(enum) { pub const ExtraVolatileQualifier = SimpleError("Extra volatile qualifier"); pub const ExtraAllowZeroQualifier = SimpleError("Extra allowzero qualifier"); pub const DeclBetweenFields = SimpleError("Declarations are not allowed between container fields"); - pub const MissingComma = SimpleError("Expected comma between items"); pub const InvalidAnd = SimpleError("`&&` is invalid. Note that `and` is boolean AND."); pub const ExpectedCall = struct { @@ -926,7 +922,7 @@ pub const Node = struct { if (i < 1) return node; i -= 1; }, - .Invalid => unreachable, + .Invalid => {}, } if (self.body_node) |body_node| { @@ -948,7 +944,7 @@ pub const Node = struct { if (self.body_node) |body_node| return body_node.lastToken(); switch (self.return_type) { .Explicit, .InferErrorSet => |node| return node.lastToken(), - .Invalid => unreachable, + .Invalid => |tok| return tok, } } }; diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 74945b1b33..a269dc616c 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -78,7 +78,7 @@ fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error /// / KEYWORD_pub? ContainerField COMMA ContainerMembers /// / KEYWORD_pub? ContainerField /// / -fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!Node.Root.DeclList { +fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !Node.Root.DeclList { var list = Node.Root.DeclList.init(arena); var field_state: union(enum) { @@ -136,7 +136,7 @@ fn parseContainerMembers(arena: *Allocator, it: *TokenIterator, tree: *Tree) All if (parseTopLevelDecl(arena, it, tree) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => { - // try again + findNextContainerMember(it); continue; }, }) |node| { @@ -324,7 +324,7 @@ fn findNextStmt(it: *TokenIterator) void { } /// Eat a multiline container doc comment -fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!?*Node { +fn parseContainerDocComments(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { var lines = Node.DocComment.LineList.init(arena); while (eatToken(it, .ContainerDocComment)) |line| { try lines.push(line); @@ -384,7 +384,7 @@ fn parseTopLevelComptime(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?* /// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / (KEYWORD_inline / KEYWORD_noinline))? FnProto (SEMICOLON / Block) /// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl /// / KEYWORD_usingnamespace Expr SEMICOLON -fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!?*Node { +fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { var lib_name: ?*Node = null; const extern_export_inline_token = blk: { if (eatToken(it, .Keyword_export)) |token| break :blk token; @@ -397,13 +397,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? break :blk null; }; - if (parseFnProto(arena, it, tree) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.ParseError => { - findNextContainerMember(it); - return error.ParseError; - }, - }) |node| { + if (try parseFnProto(arena, it, tree)) |node| { const fn_node = node.cast(Node.FnProto).?; fn_node.*.extern_export_inline_token = extern_export_inline_token; fn_node.*.lib_name = lib_name; @@ -413,7 +407,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? // since parseBlock only return error.ParseError on // a missing '}' we can assume this function was // supposed to end here. - error.ParseError => null, + error.ParseError => return node, }) |body_node| { fn_node.body_node = body_node; return node; @@ -437,14 +431,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? const thread_local_token = eatToken(it, .Keyword_threadlocal); - if (parseVarDecl(arena, it, tree) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.ParseError => { - // try to skip to next decl - findNextContainerMember(it); - return error.ParseError; - }, - }) |node| { + if (try parseVarDecl(arena, it, tree)) |node| { var var_decl = node.cast(Node.VarDecl).?; var_decl.*.thread_local_token = thread_local_token; var_decl.*.comptime_token = null; @@ -469,14 +456,7 @@ fn parseTopLevelDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) Error!? return error.ParseError; } - return parseUse(arena, it, tree) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.ParseError => { - // try to skip to next decl - findNextContainerMember(it); - return error.ParseError; - }, - }; + return try parseUse(arena, it, tree); } /// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? LinkSection? EXCLAMATIONMARK? (KEYWORD_var / TypeExpr) @@ -2926,7 +2906,7 @@ fn ListParseFn(comptime L: type, comptime nodeParseFn: var) ParseFn(L) { // this is likely just a missing comma, // continue parsing this list and give an error try tree.errors.push(.{ - .MissingComma = .{ .token = it.index }, + .ExpectedToken = .{ .token = it.index, .expected_id = .Comma }, }); }, } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index f92cc5a222..6adc44a5b7 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -32,8 +32,8 @@ test "recovery: missing comma" { \\ } \\} , &[_]Error{ - .MissingComma, - .MissingComma, + .ExpectedToken, + .ExpectedToken, .InvalidAnd, .InvalidToken, }); @@ -2983,7 +2983,6 @@ test "zig fmt: extern without container keyword returns error" { , &[_]Error{ .ExpectedExpr, .ExpectedVarDeclOrFn, - .ExpectedContainerMembers, }); } diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index 64f2f77f6c..9ca6d4450c 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -13,6 +13,9 @@ pub const Error = error{ /// Returns whether anything changed pub fn render(allocator: *mem.Allocator, stream: var, tree: *ast.Tree) (@TypeOf(stream).Error || Error)!bool { + // cannot render an invalid tree + std.debug.assert(tree.errors.len == 0); + // make a passthrough stream that checks whether something changed const MyStream = struct { const MyStream = @This(); -- cgit v1.2.3