diff options
| author | LemonBoy <thatlemon@gmail.com> | 2019-09-11 11:46:51 +0200 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2019-09-11 15:20:18 -0400 |
| commit | f36b8fd7b2c89d46dba95eca05b60487638dd2a0 (patch) | |
| tree | bd9f2164f99d43a0ce8a3b1e01e7ac9592718f78 /src/tokenizer.cpp | |
| parent | 0eddee449d8a09a999b352d769b98379865e8dbc (diff) | |
| download | zig-f36b8fd7b2c89d46dba95eca05b60487638dd2a0.tar.gz zig-f36b8fd7b2c89d46dba95eca05b60487638dd2a0.zip | |
Recognize & skip the UTF-8 BOM
Diffstat (limited to 'src/tokenizer.cpp')
| -rw-r--r-- | src/tokenizer.cpp | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 465f652288..71a24fe726 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -407,9 +407,14 @@ void tokenize(Buf *buf, Tokenization *out) { t.buf = buf; out->line_offsets = allocate<ZigList<size_t>>(1); - out->line_offsets->append(0); - for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) { + + // Skip the UTF-8 BOM if present + if (buf_starts_with_mem(buf, "\xEF\xBB\xBF", 3)) { + t.pos += 3; + } + + for (; t.pos < buf_len(t.buf); t.pos += 1) { uint8_t c = buf_ptr(t.buf)[t.pos]; switch (t.state) { case TokenizeStateError: |
