aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authorAndrea Orru <andrea@orru.io>2018-08-06 01:43:19 -0400
committerAndrea Orru <andrea@orru.io>2018-08-06 01:43:19 -0400
commitd2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d (patch)
treee9fa3caec533a0d1e2b434868b2fde1f9240e5c8 /src/tokenizer.cpp
parent06614b3fa09954464c2e2f32756cacedc178a282 (diff)
parent63a23e848a62d5f167f8d5478de9766cb24aa6eb (diff)
downloadzig-d2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d.tar.gz
zig-d2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d.zip
Merge branch 'master' into zen_stdlib
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp98
1 files changed, 59 insertions, 39 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index 365b35cdfd..1d3db5567a 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -134,6 +134,7 @@ static const struct ZigKeyword zig_keywords[] = {
{"noalias", TokenIdKeywordNoAlias},
{"null", TokenIdKeywordNull},
{"or", TokenIdKeywordOr},
+ {"orelse", TokenIdKeywordOrElse},
{"packed", TokenIdKeywordPacked},
{"promise", TokenIdKeywordPromise},
{"pub", TokenIdKeywordPub},
@@ -215,10 +216,11 @@ enum TokenizeState {
TokenizeStateSawGreaterThanGreaterThan,
TokenizeStateSawDot,
TokenizeStateSawDotDot,
- TokenizeStateSawQuestionMark,
TokenizeStateSawAtSign,
TokenizeStateCharCode,
TokenizeStateError,
+ TokenizeStateLBracket,
+ TokenizeStateLBracketStar,
};
@@ -355,12 +357,19 @@ static void end_float_token(Tokenize *t) {
// Mask the sign bit to 0 since always non-negative lex
const uint64_t exp_mask = 0xffffull << exp_shift;
- if (shift >= 64) {
+ // must be special-cased to avoid undefined behavior on shift == 64
+ if (shift == 128) {
+ f_bits.repr[0] = 0;
+ f_bits.repr[1] = sig_bits[0];
+ } else if (shift == 0) {
+ f_bits.repr[0] = sig_bits[0];
+ f_bits.repr[1] = sig_bits[1];
+ } else if (shift >= 64) {
f_bits.repr[0] = 0;
f_bits.repr[1] = sig_bits[0] << (shift - 64);
} else {
f_bits.repr[0] = sig_bits[0] << shift;
- f_bits.repr[1] = ((sig_bits[1] << shift) | (sig_bits[0] >> (64 - shift)));
+ f_bits.repr[1] = (sig_bits[1] << shift) | (sig_bits[0] >> (64 - shift));
}
f_bits.repr[1] &= ~exp_mask;
@@ -451,16 +460,21 @@ static const char* get_escape_shorthand(uint8_t c) {
static void invalid_char_error(Tokenize *t, uint8_t c) {
if (c == '\r') {
tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported");
- } else if (isprint(c)) {
+ return;
+ }
+
+ const char *sh = get_escape_shorthand(c);
+ if (sh) {
+ tokenize_error(t, "invalid character: '%s'", sh);
+ return;
+ }
+
+ if (isprint(c)) {
tokenize_error(t, "invalid character: '%c'", c);
- } else {
- const char *sh = get_escape_shorthand(c);
- if (sh) {
- tokenize_error(t, "invalid character: '%s'", sh);
- } else {
- tokenize_error(t, "invalid character: '\\x%x'", c);
- }
+ return;
}
+
+ tokenize_error(t, "invalid character: '\\x%02x'", c);
}
void tokenize(Buf *buf, Tokenization *out) {
@@ -530,6 +544,10 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdComma);
end_token(&t);
break;
+ case '?':
+ begin_token(&t, TokenIdQuestion);
+ end_token(&t);
+ break;
case '{':
begin_token(&t, TokenIdLBrace);
end_token(&t);
@@ -539,8 +557,8 @@ void tokenize(Buf *buf, Tokenization *out) {
end_token(&t);
break;
case '[':
+ t.state = TokenizeStateLBracket;
begin_token(&t, TokenIdLBracket);
- end_token(&t);
break;
case ']':
begin_token(&t, TokenIdRBracket);
@@ -622,33 +640,10 @@ void tokenize(Buf *buf, Tokenization *out) {
begin_token(&t, TokenIdDot);
t.state = TokenizeStateSawDot;
break;
- case '?':
- begin_token(&t, TokenIdMaybe);
- t.state = TokenizeStateSawQuestionMark;
- break;
default:
invalid_char_error(&t, c);
}
break;
- case TokenizeStateSawQuestionMark:
- switch (c) {
- case '?':
- set_token_id(&t, t.cur_tok, TokenIdDoubleQuestion);
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
- case '=':
- set_token_id(&t, t.cur_tok, TokenIdMaybeAssign);
- end_token(&t);
- t.state = TokenizeStateStart;
- break;
- default:
- t.pos -= 1;
- end_token(&t);
- t.state = TokenizeStateStart;
- continue;
- }
- break;
case TokenizeStateSawDot:
switch (c) {
case '.':
@@ -852,6 +847,30 @@ void tokenize(Buf *buf, Tokenization *out) {
continue;
}
break;
+ case TokenizeStateLBracket:
+ switch (c) {
+ case '*':
+ t.state = TokenizeStateLBracketStar;
+ set_token_id(&t, t.cur_tok, TokenIdBracketStarBracket);
+ break;
+ default:
+ // reinterpret as just an lbracket
+ t.pos -= 1;
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ continue;
+ }
+ break;
+ case TokenizeStateLBracketStar:
+ switch (c) {
+ case ']':
+ end_token(&t);
+ t.state = TokenizeStateStart;
+ break;
+ default:
+ invalid_char_error(&t, c);
+ }
+ break;
case TokenizeStateSawPlusPercent:
switch (c) {
case '=':
@@ -1459,7 +1478,6 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateSawGreaterThan:
case TokenizeStateSawGreaterThanGreaterThan:
case TokenizeStateSawDot:
- case TokenizeStateSawQuestionMark:
case TokenizeStateSawAtSign:
case TokenizeStateSawStarPercent:
case TokenizeStateSawPlusPercent:
@@ -1467,12 +1485,14 @@ void tokenize(Buf *buf, Tokenization *out) {
case TokenizeStateLineString:
case TokenizeStateLineStringEnd:
case TokenizeStateSawBarBar:
+ case TokenizeStateLBracket:
end_token(&t);
break;
case TokenizeStateSawDotDot:
case TokenizeStateSawBackslash:
case TokenizeStateLineStringContinue:
case TokenizeStateLineStringContinueC:
+ case TokenizeStateLBracketStar:
tokenize_error(&t, "unexpected EOF");
break;
case TokenizeStateLineComment:
@@ -1509,6 +1529,7 @@ const char * token_name(TokenId id) {
case TokenIdBitShiftRight: return ">>";
case TokenIdBitShiftRightEq: return ">>=";
case TokenIdBitXorEq: return "^=";
+ case TokenIdBracketStarBracket: return "[*]";
case TokenIdCharLiteral: return "CharLiteral";
case TokenIdCmpEq: return "==";
case TokenIdCmpGreaterOrEq: return ">=";
@@ -1521,7 +1542,6 @@ const char * token_name(TokenId id) {
case TokenIdDash: return "-";
case TokenIdDivEq: return "/=";
case TokenIdDot: return ".";
- case TokenIdDoubleQuestion: return "??";
case TokenIdEllipsis2: return "..";
case TokenIdEllipsis3: return "...";
case TokenIdEof: return "EOF";
@@ -1558,6 +1578,7 @@ const char * token_name(TokenId id) {
case TokenIdKeywordNoAlias: return "noalias";
case TokenIdKeywordNull: return "null";
case TokenIdKeywordOr: return "or";
+ case TokenIdKeywordOrElse: return "orelse";
case TokenIdKeywordPacked: return "packed";
case TokenIdKeywordPromise: return "promise";
case TokenIdKeywordPub: return "pub";
@@ -1580,8 +1601,7 @@ const char * token_name(TokenId id) {
case TokenIdLBrace: return "{";
case TokenIdLBracket: return "[";
case TokenIdLParen: return "(";
- case TokenIdMaybe: return "?";
- case TokenIdMaybeAssign: return "?=";
+ case TokenIdQuestion: return "?";
case TokenIdMinusEq: return "-=";
case TokenIdMinusPercent: return "-%";
case TokenIdMinusPercentEq: return "-%=";