aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2019-10-09 13:25:41 -0400
committerGitHub <noreply@github.com>2019-10-09 13:25:41 -0400
commit406b70aa56b9a95e768c321dd3caf164add1b49a (patch)
treedc89654bce360751073bd712fe51b8c023900636 /lib/std
parentf929a58d5f69c26c25ced89f31f60d0a92ffc46a (diff)
parentae7392e504e7765b05d98636cc249cbf92233f5c (diff)
downloadzig-406b70aa56b9a95e768c321dd3caf164add1b49a.tar.gz
zig-406b70aa56b9a95e768c321dd3caf164add1b49a.zip
Merge pull request #3390 from nrdmn/unicode_character_literals
unicode character literals
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/zig/tokenizer.zig41
1 files changed, 35 insertions, 6 deletions
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
index b0a6cd1122..872893a607 100644
--- a/lib/std/zig/tokenizer.zig
+++ b/lib/std/zig/tokenizer.zig
@@ -371,6 +371,7 @@ pub const Tokenizer = struct {
CharLiteralUnicodeEscapeSawU,
CharLiteralUnicodeEscape,
CharLiteralUnicodeInvalid,
+ CharLiteralUnicode,
CharLiteralEnd,
Backslash,
Equal,
@@ -427,6 +428,7 @@ pub const Tokenizer = struct {
.end = undefined,
};
var seen_escape_digits: usize = undefined;
+ var remaining_code_units: usize = undefined;
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
@@ -774,16 +776,23 @@ pub const Tokenizer = struct {
'\\' => {
state = State.CharLiteralBackslash;
},
- '\'' => {
+ '\'', 0x80...0xbf, 0xf8...0xff => {
result.id = Token.Id.Invalid;
break;
},
+ 0xc0...0xdf => { // 110xxxxx
+ remaining_code_units = 1;
+ state = State.CharLiteralUnicode;
+ },
+ 0xe0...0xef => { // 1110xxxx
+ remaining_code_units = 2;
+ state = State.CharLiteralUnicode;
+ },
+ 0xf0...0xf7 => { // 11110xxx
+ remaining_code_units = 3;
+ state = State.CharLiteralUnicode;
+ },
else => {
- if (c < 0x20 or c == 0x7f) {
- result.id = Token.Id.Invalid;
- break;
- }
-
state = State.CharLiteralEnd;
},
},
@@ -867,6 +876,19 @@ pub const Tokenizer = struct {
},
},
+ State.CharLiteralUnicode => switch (c) {
+ 0x80...0xbf => {
+ remaining_code_units -= 1;
+ if (remaining_code_units == 0) {
+ state = State.CharLiteralEnd;
+ }
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ break;
+ },
+ },
+
State.MultilineStringLiteralLine => switch (c) {
'\n' => {
self.index += 1;
@@ -1220,6 +1242,7 @@ pub const Tokenizer = struct {
State.CharLiteralUnicodeEscape,
State.CharLiteralUnicodeInvalid,
State.CharLiteralEnd,
+ State.CharLiteralUnicode,
State.StringLiteralBackslash,
State.LBracketStar,
State.LBracketStarC,
@@ -1428,6 +1451,12 @@ test "tokenizer - char literal with unicode escapes" {
, [_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid });
}
+test "tokenizer - char literal with unicode code point" {
+ testTokenize(
+ \\'💩'
+ , [_]Token.Id{.CharLiteral});
+}
+
test "tokenizer - float literal e exponent" {
testTokenize("a = 4.94065645841246544177e-324;\n", [_]Token.Id{
Token.Id.Identifier,