diff options
| author | Andrea Orru <andrea@orru.io> | 2018-08-06 01:43:19 -0400 |
|---|---|---|
| committer | Andrea Orru <andrea@orru.io> | 2018-08-06 01:43:19 -0400 |
| commit | d2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d (patch) | |
| tree | e9fa3caec533a0d1e2b434868b2fde1f9240e5c8 /src/tokenizer.cpp | |
| parent | 06614b3fa09954464c2e2f32756cacedc178a282 (diff) | |
| parent | 63a23e848a62d5f167f8d5478de9766cb24aa6eb (diff) | |
| download | zig-d2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d.tar.gz zig-d2f5e57b68da0b16e5789ca19045ccbcb4ecfa8d.zip | |
Merge branch 'master' into zen_stdlib
Diffstat (limited to 'src/tokenizer.cpp')
| -rw-r--r-- | src/tokenizer.cpp | 98 |
1 files changed, 59 insertions, 39 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 365b35cdfd..1d3db5567a 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -134,6 +134,7 @@ static const struct ZigKeyword zig_keywords[] = { {"noalias", TokenIdKeywordNoAlias}, {"null", TokenIdKeywordNull}, {"or", TokenIdKeywordOr}, + {"orelse", TokenIdKeywordOrElse}, {"packed", TokenIdKeywordPacked}, {"promise", TokenIdKeywordPromise}, {"pub", TokenIdKeywordPub}, @@ -215,10 +216,11 @@ enum TokenizeState { TokenizeStateSawGreaterThanGreaterThan, TokenizeStateSawDot, TokenizeStateSawDotDot, - TokenizeStateSawQuestionMark, TokenizeStateSawAtSign, TokenizeStateCharCode, TokenizeStateError, + TokenizeStateLBracket, + TokenizeStateLBracketStar, }; @@ -355,12 +357,19 @@ static void end_float_token(Tokenize *t) { // Mask the sign bit to 0 since always non-negative lex const uint64_t exp_mask = 0xffffull << exp_shift; - if (shift >= 64) { + // must be special-cased to avoid undefined behavior on shift == 64 + if (shift == 128) { + f_bits.repr[0] = 0; + f_bits.repr[1] = sig_bits[0]; + } else if (shift == 0) { + f_bits.repr[0] = sig_bits[0]; + f_bits.repr[1] = sig_bits[1]; + } else if (shift >= 64) { f_bits.repr[0] = 0; f_bits.repr[1] = sig_bits[0] << (shift - 64); } else { f_bits.repr[0] = sig_bits[0] << shift; - f_bits.repr[1] = ((sig_bits[1] << shift) | (sig_bits[0] >> (64 - shift))); + f_bits.repr[1] = (sig_bits[1] << shift) | (sig_bits[0] >> (64 - shift)); } f_bits.repr[1] &= ~exp_mask; @@ -451,16 +460,21 @@ static const char* get_escape_shorthand(uint8_t c) { static void invalid_char_error(Tokenize *t, uint8_t c) { if (c == '\r') { tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported"); - } else if (isprint(c)) { + return; + } + + const char *sh = get_escape_shorthand(c); + if (sh) { + tokenize_error(t, "invalid character: '%s'", sh); + return; + } + + if (isprint(c)) { tokenize_error(t, "invalid character: '%c'", c); - } else { - const char *sh = get_escape_shorthand(c); - if (sh) { - tokenize_error(t, "invalid character: '%s'", sh); - } else { - tokenize_error(t, "invalid character: '\\x%x'", c); - } + return; } + + tokenize_error(t, "invalid character: '\\x%02x'", c); } void tokenize(Buf *buf, Tokenization *out) { @@ -530,6 +544,10 @@ void tokenize(Buf *buf, Tokenization *out) { begin_token(&t, TokenIdComma); end_token(&t); break; + case '?': + begin_token(&t, TokenIdQuestion); + end_token(&t); + break; case '{': begin_token(&t, TokenIdLBrace); end_token(&t); @@ -539,8 +557,8 @@ void tokenize(Buf *buf, Tokenization *out) { end_token(&t); break; case '[': + t.state = TokenizeStateLBracket; begin_token(&t, TokenIdLBracket); - end_token(&t); break; case ']': begin_token(&t, TokenIdRBracket); @@ -622,33 +640,10 @@ void tokenize(Buf *buf, Tokenization *out) { begin_token(&t, TokenIdDot); t.state = TokenizeStateSawDot; break; - case '?': - begin_token(&t, TokenIdMaybe); - t.state = TokenizeStateSawQuestionMark; - break; default: invalid_char_error(&t, c); } break; - case TokenizeStateSawQuestionMark: - switch (c) { - case '?': - set_token_id(&t, t.cur_tok, TokenIdDoubleQuestion); - end_token(&t); - t.state = TokenizeStateStart; - break; - case '=': - set_token_id(&t, t.cur_tok, TokenIdMaybeAssign); - end_token(&t); - t.state = TokenizeStateStart; - break; - default: - t.pos -= 1; - end_token(&t); - t.state = TokenizeStateStart; - continue; - } - break; case TokenizeStateSawDot: switch (c) { case '.': @@ -852,6 +847,30 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateLBracket: + switch (c) { + case '*': + t.state = TokenizeStateLBracketStar; + set_token_id(&t, t.cur_tok, TokenIdBracketStarBracket); + break; + default: + // reinterpret as just an lbracket + t.pos -= 1; + end_token(&t); + t.state = TokenizeStateStart; + continue; + } + break; + case TokenizeStateLBracketStar: + switch (c) { + case ']': + end_token(&t); + t.state = TokenizeStateStart; + break; + default: + invalid_char_error(&t, c); + } + break; case TokenizeStateSawPlusPercent: switch (c) { case '=': @@ -1459,7 +1478,6 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateSawGreaterThan: case TokenizeStateSawGreaterThanGreaterThan: case TokenizeStateSawDot: - case TokenizeStateSawQuestionMark: case TokenizeStateSawAtSign: case TokenizeStateSawStarPercent: case TokenizeStateSawPlusPercent: @@ -1467,12 +1485,14 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateLineString: case TokenizeStateLineStringEnd: case TokenizeStateSawBarBar: + case TokenizeStateLBracket: end_token(&t); break; case TokenizeStateSawDotDot: case TokenizeStateSawBackslash: case TokenizeStateLineStringContinue: case TokenizeStateLineStringContinueC: + case TokenizeStateLBracketStar: tokenize_error(&t, "unexpected EOF"); break; case TokenizeStateLineComment: @@ -1509,6 +1529,7 @@ const char * token_name(TokenId id) { case TokenIdBitShiftRight: return ">>"; case TokenIdBitShiftRightEq: return ">>="; case TokenIdBitXorEq: return "^="; + case TokenIdBracketStarBracket: return "[*]"; case TokenIdCharLiteral: return "CharLiteral"; case TokenIdCmpEq: return "=="; case TokenIdCmpGreaterOrEq: return ">="; @@ -1521,7 +1542,6 @@ const char * token_name(TokenId id) { case TokenIdDash: return "-"; case TokenIdDivEq: return "/="; case TokenIdDot: return "."; - case TokenIdDoubleQuestion: return "??"; case TokenIdEllipsis2: return ".."; case TokenIdEllipsis3: return "..."; case TokenIdEof: return "EOF"; @@ -1558,6 +1578,7 @@ const char * token_name(TokenId id) { case TokenIdKeywordNoAlias: return "noalias"; case TokenIdKeywordNull: return "null"; case TokenIdKeywordOr: return "or"; + case TokenIdKeywordOrElse: return "orelse"; case TokenIdKeywordPacked: return "packed"; case TokenIdKeywordPromise: return "promise"; case TokenIdKeywordPub: return "pub"; @@ -1580,8 +1601,7 @@ const char * token_name(TokenId id) { case TokenIdLBrace: return "{"; case TokenIdLBracket: return "["; case TokenIdLParen: return "("; - case TokenIdMaybe: return "?"; - case TokenIdMaybeAssign: return "?="; + case TokenIdQuestion: return "?"; case TokenIdMinusEq: return "-="; case TokenIdMinusPercent: return "-%"; case TokenIdMinusPercentEq: return "-%="; |
