diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2019-03-23 17:35:21 -0400 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2019-03-23 17:35:21 -0400 |
| commit | 89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch) | |
| tree | 42f76e6da37b08e0022af060545c5e5d3f6bd476 /src/tokenizer.cpp | |
| parent | 55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff) | |
| download | zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip | |
character literals: allow unicode escapes
also make the documentation for character literals more clear.
closes #2089
see #2097
Diffstat (limited to 'src/tokenizer.cpp')
| -rw-r--r-- | src/tokenizer.cpp | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index dc9d61aa22..7d41343e3a 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -1103,11 +1103,15 @@ void tokenize(Buf *buf, Tokenization *out) { if (t.char_code_index >= t.char_code_end) { if (t.unicode) { - if (t.char_code <= 0x7f) { + if (t.char_code > 0x10ffff) { + tokenize_error(&t, "unicode value out of range: %x", t.char_code); + } + if (t.cur_tok->id == TokenIdCharLiteral) { + t.cur_tok->data.char_lit.c = t.char_code; + t.state = TokenizeStateCharLiteralEnd; + } else if (t.char_code <= 0x7f) { // 00000000 00000000 00000000 0xxxxxxx handle_string_escape(&t, (uint8_t)t.char_code); - } else if (t.cur_tok->id == TokenIdCharLiteral) { - tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code); } else if (t.char_code <= 0x7ff) { // 00000000 00000000 00000xxx xx000000 handle_string_escape(&t, (uint8_t)(0xc0 | (t.char_code >> 6))); @@ -1129,14 +1133,9 @@ void tokenize(Buf *buf, Tokenization *out) { handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 6) & 0x3f))); // 00000000 00000000 00000000 00xxxxxx handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f))); - } else { - tokenize_error(&t, "unicode value out of range: %x", t.char_code); } } else { - if (t.cur_tok->id == TokenIdCharLiteral && t.char_code > UINT8_MAX) { - tokenize_error(&t, "value too large for character literal: '%x'", - t.char_code); - } + assert(t.char_code <= 255); handle_string_escape(&t, (uint8_t)t.char_code); } } |
