aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authorLemonBoy <thatlemon@gmail.com>2019-09-11 11:46:51 +0200
committerAndrew Kelley <andrew@ziglang.org>2019-09-11 15:20:18 -0400
commitf36b8fd7b2c89d46dba95eca05b60487638dd2a0 (patch)
treebd9f2164f99d43a0ce8a3b1e01e7ac9592718f78 /src/tokenizer.cpp
parent0eddee449d8a09a999b352d769b98379865e8dbc (diff)
downloadzig-f36b8fd7b2c89d46dba95eca05b60487638dd2a0.tar.gz
zig-f36b8fd7b2c89d46dba95eca05b60487638dd2a0.zip
Recognize & skip the UTF-8 BOM
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 465f652288..71a24fe726 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -407,9 +407,14 @@ void tokenize(Buf *buf, Tokenization *out) {
t.buf = buf;
out->line_offsets = allocate<ZigList<size_t>>(1);
-
out->line_offsets->append(0);
- for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
+
+ // Skip the UTF-8 BOM if present
+ if (buf_starts_with_mem(buf, "\xEF\xBB\xBF", 3)) {
+ t.pos += 3;
+ }
+
+ for (; t.pos < buf_len(t.buf); t.pos += 1) {
uint8_t c = buf_ptr(t.buf)[t.pos];
switch (t.state) {
case TokenizeStateError: