aboutsummaryrefslogtreecommitdiff
path: root/std
diff options
context:
space:
mode:
Diffstat (limited to 'std')
-rw-r--r--std/zig/tokenizer.zig11
1 files changed, 10 insertions, 1 deletions
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 19fb233567..f25da12a91 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -222,9 +222,11 @@ pub const Tokenizer = struct {
},
};
} else {
+ // Skip the UTF-8 BOM if present
+ const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else usize(0);
return Tokenizer{
.buffer = buffer,
- .index = 0,
+ .index = src_start,
.pending_invalid_token = null,
};
}
@@ -1455,6 +1457,13 @@ test "tokenizer - line comment followed by identifier" {
});
}
+test "tokenizer - UTF-8 BOM is recognized and skipped" {
+ testTokenize("\xEF\xBB\xBFa;\n", [_]Token.Id{
+ Token.Id.Identifier,
+ Token.Id.Semicolon,
+ });
+}
+
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| {