aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2019-03-23 17:35:21 -0400
committerAndrew Kelley <andrew@ziglang.org>2019-03-23 17:35:21 -0400
commit89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch)
tree42f76e6da37b08e0022af060545c5e5d3f6bd476 /src/tokenizer.cpp
parent55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff)
downloadzig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz
zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip
character literals: allow unicode escapes
also make the documentation for character literals more clear. closes #2089 see #2097
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp17
1 files changed, 8 insertions, 9 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index dc9d61aa22..7d41343e3a 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -1103,11 +1103,15 @@ void tokenize(Buf *buf, Tokenization *out) {
if (t.char_code_index >= t.char_code_end) {
if (t.unicode) {
- if (t.char_code <= 0x7f) {
+ if (t.char_code > 0x10ffff) {
+ tokenize_error(&t, "unicode value out of range: %x", t.char_code);
+ }
+ if (t.cur_tok->id == TokenIdCharLiteral) {
+ t.cur_tok->data.char_lit.c = t.char_code;
+ t.state = TokenizeStateCharLiteralEnd;
+ } else if (t.char_code <= 0x7f) {
// 00000000 00000000 00000000 0xxxxxxx
handle_string_escape(&t, (uint8_t)t.char_code);
- } else if (t.cur_tok->id == TokenIdCharLiteral) {
- tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code);
} else if (t.char_code <= 0x7ff) {
// 00000000 00000000 00000xxx xx000000
handle_string_escape(&t, (uint8_t)(0xc0 | (t.char_code >> 6)));
@@ -1129,14 +1133,9 @@ void tokenize(Buf *buf, Tokenization *out) {
handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 6) & 0x3f)));
// 00000000 00000000 00000000 00xxxxxx
handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f)));
- } else {
- tokenize_error(&t, "unicode value out of range: %x", t.char_code);
}
} else {
- if (t.cur_tok->id == TokenIdCharLiteral && t.char_code > UINT8_MAX) {
- tokenize_error(&t, "value too large for character literal: '%x'",
- t.char_code);
- }
+ assert(t.char_code <= 255);
handle_string_escape(&t, (uint8_t)t.char_code);
}
}