aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
authorNick Erdmann <n@nirf.de>2019-10-06 19:52:35 +0200
committerNick Erdmann <n@nirf.de>2019-10-07 08:18:16 +0200
commitae7392e504e7765b05d98636cc249cbf92233f5c (patch)
tree42fa9e83943c41a4e84e0152b1deb427dfbdc292 /lib/std
parent571123465b2e030b7b9cf42732ed30f77192fbcd (diff)
downloadzig-ae7392e504e7765b05d98636cc249cbf92233f5c.tar.gz
zig-ae7392e504e7765b05d98636cc249cbf92233f5c.zip
unicode character literals
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/zig/tokenizer.zig41
1 files changed, 35 insertions, 6 deletions
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
index b0a6cd1122..872893a607 100644
--- a/lib/std/zig/tokenizer.zig
+++ b/lib/std/zig/tokenizer.zig
@@ -371,6 +371,7 @@ pub const Tokenizer = struct {
CharLiteralUnicodeEscapeSawU,
CharLiteralUnicodeEscape,
CharLiteralUnicodeInvalid,
+ CharLiteralUnicode,
CharLiteralEnd,
Backslash,
Equal,
@@ -427,6 +428,7 @@ pub const Tokenizer = struct {
.end = undefined,
};
var seen_escape_digits: usize = undefined;
+ var remaining_code_units: usize = undefined;
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
@@ -774,16 +776,23 @@ pub const Tokenizer = struct {
'\\' => {
state = State.CharLiteralBackslash;
},
- '\'' => {
+ '\'', 0x80...0xbf, 0xf8...0xff => {
result.id = Token.Id.Invalid;
break;
},
+ 0xc0...0xdf => { // 110xxxxx
+ remaining_code_units = 1;
+ state = State.CharLiteralUnicode;
+ },
+ 0xe0...0xef => { // 1110xxxx
+ remaining_code_units = 2;
+ state = State.CharLiteralUnicode;
+ },
+ 0xf0...0xf7 => { // 11110xxx
+ remaining_code_units = 3;
+ state = State.CharLiteralUnicode;
+ },
else => {
- if (c < 0x20 or c == 0x7f) {
- result.id = Token.Id.Invalid;
- break;
- }
-
state = State.CharLiteralEnd;
},
},
@@ -867,6 +876,19 @@ pub const Tokenizer = struct {
},
},
+ State.CharLiteralUnicode => switch (c) {
+ 0x80...0xbf => {
+ remaining_code_units -= 1;
+ if (remaining_code_units == 0) {
+ state = State.CharLiteralEnd;
+ }
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ break;
+ },
+ },
+
State.MultilineStringLiteralLine => switch (c) {
'\n' => {
self.index += 1;
@@ -1220,6 +1242,7 @@ pub const Tokenizer = struct {
State.CharLiteralUnicodeEscape,
State.CharLiteralUnicodeInvalid,
State.CharLiteralEnd,
+ State.CharLiteralUnicode,
State.StringLiteralBackslash,
State.LBracketStar,
State.LBracketStarC,
@@ -1428,6 +1451,12 @@ test "tokenizer - char literal with unicode escapes" {
, [_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid });
}
+test "tokenizer - char literal with unicode code point" {
+ testTokenize(
+ \\'💩'
+ , [_]Token.Id{.CharLiteral});
+}
+
test "tokenizer - float literal e exponent" {
testTokenize("a = 4.94065645841246544177e-324;\n", [_]Token.Id{
Token.Id.Identifier,