character literals: allow unicode escapes

also make the documentation for character literals more clear. closes #2089 see #2097
author: Andrew Kelley <andrew@ziglang.org> 2019-03-23 17:35:21 -0400
committer: Andrew Kelley <andrew@ziglang.org> 2019-03-23 17:35:21 -0400
commit: 89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch)
tree: 42f76e6da37b08e0022af060545c5e5d3f6bd476 /src/tokenizer.cpp
parent: 55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff)
download: zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz
zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip
1 files changed, 8 insertions, 9 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index dc9d61aa22..7d41343e3a 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -1103,11 +1103,15 @@ void tokenize(Buf *buf, Tokenization *out) {
 
                     if (t.char_code_index >= t.char_code_end) {
                         if (t.unicode) {
-                            if (t.char_code <= 0x7f) {
+                            if (t.char_code > 0x10ffff) {
+                                tokenize_error(&t, "unicode value out of range: %x", t.char_code);
+                            }
+                            if (t.cur_tok->id == TokenIdCharLiteral) {
+                                t.cur_tok->data.char_lit.c = t.char_code;
+                                t.state = TokenizeStateCharLiteralEnd;
+                            } else if (t.char_code <= 0x7f) {
                                 // 00000000 00000000 00000000 0xxxxxxx
                                 handle_string_escape(&t, (uint8_t)t.char_code);
-                            } else if (t.cur_tok->id == TokenIdCharLiteral) {
-                                tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code);
                             } else if (t.char_code <= 0x7ff) {
                                 // 00000000 00000000 00000xxx xx000000
                                 handle_string_escape(&t, (uint8_t)(0xc0 | (t.char_code >> 6)));
@@ -1129,14 +1133,9 @@ void tokenize(Buf *buf, Tokenization *out) {
                                 handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 6) & 0x3f)));
                                 // 00000000 00000000 00000000 00xxxxxx
                                 handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f)));
-                            } else {
-                                tokenize_error(&t, "unicode value out of range: %x", t.char_code);
                             }
                         } else {
-                            if (t.cur_tok->id == TokenIdCharLiteral && t.char_code > UINT8_MAX) {
-                                tokenize_error(&t, "value too large for character literal: '%x'",
-                                        t.char_code);
-                            }
+                            assert(t.char_code <= 255);
                             handle_string_escape(&t, (uint8_t)t.char_code);
                         }
                     }
author	Andrew Kelley <andrew@ziglang.org>	2019-03-23 17:35:21 -0400
committer	Andrew Kelley <andrew@ziglang.org>	2019-03-23 17:35:21 -0400
commit	89953ec83d8afe4fed0fc9e3cdded09c7522bf86 (patch)
tree	42f76e6da37b08e0022af060545c5e5d3f6bd476 /src/tokenizer.cpp
parent	55cb9ef138c7cf0a23e7f852a82884612a3ca663 (diff)
download	zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.tar.gz zig-89953ec83d8afe4fed0fc9e3cdded09c7522bf86.zip