From 3ba916584db5485c38ebf2390e8d22bc6d81bf8e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 4 Nov 2022 18:47:19 -0700 Subject: actually remove stage1 --- src/stage1/tokenizer.cpp | 1626 ---------------------------------------------- 1 file changed, 1626 deletions(-) delete mode 100644 src/stage1/tokenizer.cpp (limited to 'src/stage1/tokenizer.cpp') diff --git a/src/stage1/tokenizer.cpp b/src/stage1/tokenizer.cpp deleted file mode 100644 index 820c73be5c..0000000000 --- a/src/stage1/tokenizer.cpp +++ /dev/null @@ -1,1626 +0,0 @@ -/* - * Copyright (c) 2015 Andrew Kelley - * - * This file is part of zig, which is MIT licensed. - * See http://opensource.org/licenses/MIT - */ - -#include "tokenizer.hpp" -#include "util.hpp" - -#include -#include -#include -#include -#include -#include - -#define WHITESPACE \ - ' ': \ - case '\r': \ - case '\n' - -#define DIGIT_NON_ZERO \ - '1': \ - case '2': \ - case '3': \ - case '4': \ - case '5': \ - case '6': \ - case '7': \ - case '8': \ - case '9' - -#define DIGIT \ - '0': \ - case DIGIT_NON_ZERO - -#define HEXDIGIT \ - 'a': \ - case 'b': \ - case 'c': \ - case 'd': \ - case 'e': \ - case 'f': \ - case 'A': \ - case 'B': \ - case 'C': \ - case 'D': \ - case 'E': \ - case 'F': \ - case DIGIT - -#define ALPHA_EXCEPT_HEX_P_O_X \ - 'g': \ - case 'h': \ - case 'i': \ - case 'j': \ - case 'k': \ - case 'l': \ - case 'm': \ - case 'n': \ - case 'q': \ - case 'r': \ - case 's': \ - case 't': \ - case 'u': \ - case 'v': \ - case 'w': \ - case 'y': \ - case 'z': \ - case 'G': \ - case 'H': \ - case 'I': \ - case 'J': \ - case 'K': \ - case 'L': \ - case 'M': \ - case 'N': \ - case 'O': \ - case 'Q': \ - case 'R': \ - case 'S': \ - case 'T': \ - case 'U': \ - case 'V': \ - case 'W': \ - case 'X': \ - case 'Y': \ - case 'Z' - -#define ALPHA_EXCEPT_E_B_O_X \ - ALPHA_EXCEPT_HEX_P_O_X: \ - case 'a': \ - case 'c': \ - case 'd': \ - case 'f': \ - case 'A': \ - case 'B': \ - case 'C': \ - case 'D': \ - case 'F': \ - case 'p': \ - case 'P' - -#define ALPHA_EXCEPT_HEX_AND_P \ - ALPHA_EXCEPT_HEX_P_O_X: \ - case 'o': \ - case 'x' - -#define ALPHA_EXCEPT_E \ - ALPHA_EXCEPT_HEX_AND_P: \ - case 'a': \ - case 'b': \ - case 'c': \ - case 'd': \ - case 'f': \ - case 'A': \ - case 'B': \ - case 'C': \ - case 'D': \ - case 'F': \ - case 'p': \ - case 'P' - -#define ALPHA \ - ALPHA_EXCEPT_E: \ - case 'e': \ - case 'E' - -#define IDENTIFIER_CHAR \ - ALPHA: \ - case DIGIT: \ - case '_' - -#define SYMBOL_START \ - ALPHA: \ - case '_' - -struct ZigKeyword { - const char *text; - TokenId token_id; -}; - -static const struct ZigKeyword zig_keywords[] = { - {"align", TokenIdKeywordAlign}, - {"allowzero", TokenIdKeywordAllowZero}, - {"and", TokenIdKeywordAnd}, - {"anyframe", TokenIdKeywordAnyFrame}, - {"anytype", TokenIdKeywordAnyType}, - {"asm", TokenIdKeywordAsm}, - {"async", TokenIdKeywordAsync}, - {"await", TokenIdKeywordAwait}, - {"break", TokenIdKeywordBreak}, - {"callconv", TokenIdKeywordCallconv}, - {"catch", TokenIdKeywordCatch}, - {"comptime", TokenIdKeywordCompTime}, - {"const", TokenIdKeywordConst}, - {"continue", TokenIdKeywordContinue}, - {"defer", TokenIdKeywordDefer}, - {"else", TokenIdKeywordElse}, - {"enum", TokenIdKeywordEnum}, - {"errdefer", TokenIdKeywordErrdefer}, - {"error", TokenIdKeywordError}, - {"export", TokenIdKeywordExport}, - {"extern", TokenIdKeywordExtern}, - {"fn", TokenIdKeywordFn}, - {"for", TokenIdKeywordFor}, - {"if", TokenIdKeywordIf}, - {"inline", TokenIdKeywordInline}, - {"noalias", TokenIdKeywordNoAlias}, - {"noinline", TokenIdKeywordNoInline}, - {"nosuspend", TokenIdKeywordNoSuspend}, - {"opaque", TokenIdKeywordOpaque}, - {"or", TokenIdKeywordOr}, - {"orelse", TokenIdKeywordOrElse}, - {"packed", TokenIdKeywordPacked}, - {"pub", TokenIdKeywordPub}, - {"resume", TokenIdKeywordResume}, - {"return", TokenIdKeywordReturn}, - {"linksection", TokenIdKeywordLinkSection}, - {"struct", TokenIdKeywordStruct}, - {"suspend", TokenIdKeywordSuspend}, - {"switch", TokenIdKeywordSwitch}, - {"test", TokenIdKeywordTest}, - {"threadlocal", TokenIdKeywordThreadLocal}, - {"try", TokenIdKeywordTry}, - {"union", TokenIdKeywordUnion}, - {"unreachable", TokenIdKeywordUnreachable}, - {"usingnamespace", TokenIdKeywordUsingNamespace}, - {"var", TokenIdKeywordVar}, - {"volatile", TokenIdKeywordVolatile}, - {"while", TokenIdKeywordWhile}, -}; - -// Returns TokenIdIdentifier if it is not a keyword. -static TokenId zig_keyword_token(const char *name_ptr, size_t name_len) { - for (size_t i = 0; i < array_length(zig_keywords); i += 1) { - if (mem_eql_str(name_ptr, name_len, zig_keywords[i].text)) { - return zig_keywords[i].token_id; - } - } - return TokenIdIdentifier; -} - -enum TokenizeState { - TokenizeState_start, - TokenizeState_identifier, - TokenizeState_builtin, - TokenizeState_string_literal, - TokenizeState_string_literal_backslash, - TokenizeState_multiline_string_literal_line, - TokenizeState_char_literal, - TokenizeState_char_literal_backslash, - TokenizeState_char_literal_hex_escape, - TokenizeState_char_literal_unicode_escape_saw_u, - TokenizeState_char_literal_unicode_escape, - TokenizeState_char_literal_unicode, - TokenizeState_char_literal_end, - TokenizeState_backslash, - TokenizeState_equal, - TokenizeState_bang, - TokenizeState_pipe, - TokenizeState_minus, - TokenizeState_minus_percent, - TokenizeState_minus_pipe, - TokenizeState_asterisk, - TokenizeState_asterisk_percent, - TokenizeState_asterisk_pipe, - TokenizeState_slash, - TokenizeState_line_comment_start, - TokenizeState_line_comment, - TokenizeState_doc_comment_start, - TokenizeState_doc_comment, - TokenizeState_container_doc_comment, - TokenizeState_zero, - TokenizeState_int_literal_dec, - TokenizeState_int_literal_dec_no_underscore, - TokenizeState_int_literal_bin, - TokenizeState_int_literal_bin_no_underscore, - TokenizeState_int_literal_oct, - TokenizeState_int_literal_oct_no_underscore, - TokenizeState_int_literal_hex, - TokenizeState_int_literal_hex_no_underscore, - TokenizeState_num_dot_dec, - TokenizeState_num_dot_hex, - TokenizeState_float_fraction_dec, - TokenizeState_float_fraction_dec_no_underscore, - TokenizeState_float_fraction_hex, - TokenizeState_float_fraction_hex_no_underscore, - TokenizeState_float_exponent_unsigned, - TokenizeState_float_exponent_num, - TokenizeState_float_exponent_num_no_underscore, - TokenizeState_ampersand, - TokenizeState_caret, - TokenizeState_percent, - TokenizeState_plus, - TokenizeState_plus_percent, - TokenizeState_plus_pipe, - TokenizeState_angle_bracket_left, - TokenizeState_angle_bracket_angle_bracket_left, - TokenizeState_angle_bracket_angle_bracket_left_pipe, - TokenizeState_angle_bracket_right, - TokenizeState_angle_bracket_angle_bracket_right, - TokenizeState_period, - TokenizeState_period_2, - TokenizeState_period_asterisk, - TokenizeState_saw_at_sign, - TokenizeState_error, -}; - - -struct Tokenize { - Tokenization *out; - size_t pos; - TokenizeState state; - uint32_t line; - uint32_t column; -}; - -ATTRIBUTE_PRINTF(2, 3) -static void tokenize_error(Tokenize *t, const char *format, ...) { - t->state = TokenizeState_error; - - t->out->err_byte_offset = t->pos; - - va_list ap; - va_start(ap, format); - t->out->err = buf_vprintf(format, ap); - va_end(ap); -} - -static void begin_token(Tokenize *t, TokenId id) { - t->out->ids.append(id); - TokenLoc tok_loc; - tok_loc.offset = (uint32_t) t->pos; - tok_loc.line = t->line; - tok_loc.column = t->column; - t->out->locs.append(tok_loc); -} - -static void cancel_token(Tokenize *t) { - t->out->ids.pop(); - t->out->locs.pop(); -} - -static const char* get_escape_shorthand(uint8_t c) { - switch (c) { - case '\0': - return "\\0"; - case '\a': - return "\\a"; - case '\b': - return "\\b"; - case '\t': - return "\\t"; - case '\n': - return "\\n"; - case '\v': - return "\\v"; - case '\f': - return "\\f"; - case '\r': - return "\\r"; - default: - return nullptr; - } -} - -static void invalid_eof(Tokenize *t) { - return tokenize_error(t, "unexpected End-Of-File"); -} - -static void invalid_char_error(Tokenize *t, uint8_t c) { - if (c == 0) { - return invalid_eof(t); - } - - if (c == '\r') { - tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported"); - return; - } - - const char *sh = get_escape_shorthand(c); - if (sh) { - tokenize_error(t, "invalid character: '%s'", sh); - return; - } - - if (isprint(c)) { - tokenize_error(t, "invalid character: '%c'", c); - return; - } - - tokenize_error(t, "invalid character: '\\x%02x'", c); -} - -void tokenize(const char *source, Tokenization *out) { - Tokenize t = {0}; - t.out = out; - - size_t remaining_code_units; - size_t seen_escape_digits; - - // Skip the UTF-8 BOM if present. - if (source[0] == (char)0xef && - source[1] == (char)0xbb && - source[2] == (char)0xbf) - { - t.pos += 3; - } - - // Invalid token takes up index 0 so that index 0 can mean "none". - begin_token(&t, TokenIdCount); - - for (;;) { - uint8_t c = source[t.pos]; - switch (t.state) { - case TokenizeState_error: - goto eof; - case TokenizeState_start: - switch (c) { - case 0: - goto eof; - case WHITESPACE: - break; - case '"': - begin_token(&t, TokenIdStringLiteral); - t.state = TokenizeState_string_literal; - break; - case '\'': - begin_token(&t, TokenIdCharLiteral); - t.state = TokenizeState_char_literal; - break; - case ALPHA: - case '_': - t.state = TokenizeState_identifier; - begin_token(&t, TokenIdIdentifier); - break; - case '@': - begin_token(&t, TokenIdBuiltin); - t.state = TokenizeState_saw_at_sign; - break; - case '=': - begin_token(&t, TokenIdEq); - t.state = TokenizeState_equal; - break; - case '!': - begin_token(&t, TokenIdBang); - t.state = TokenizeState_bang; - break; - case '|': - begin_token(&t, TokenIdBinOr); - t.state = TokenizeState_pipe; - break; - case '(': - begin_token(&t, TokenIdLParen); - break; - case ')': - begin_token(&t, TokenIdRParen); - break; - case '[': - begin_token(&t, TokenIdLBracket); - break; - case ']': - begin_token(&t, TokenIdRBracket); - break; - case ';': - begin_token(&t, TokenIdSemicolon); - break; - case ',': - begin_token(&t, TokenIdComma); - break; - case '?': - begin_token(&t, TokenIdQuestion); - break; - case ':': - begin_token(&t, TokenIdColon); - break; - case '%': - begin_token(&t, TokenIdPercent); - t.state = TokenizeState_percent; - break; - case '*': - begin_token(&t, TokenIdStar); - t.state = TokenizeState_asterisk; - break; - case '+': - begin_token(&t, TokenIdPlus); - t.state = TokenizeState_plus; - break; - case '<': - begin_token(&t, TokenIdCmpLessThan); - t.state = TokenizeState_angle_bracket_left; - break; - case '>': - begin_token(&t, TokenIdCmpGreaterThan); - t.state = TokenizeState_angle_bracket_right; - break; - case '^': - begin_token(&t, TokenIdBinXor); - t.state = TokenizeState_caret; - break; - case '\\': - begin_token(&t, TokenIdMultilineStringLiteralLine); - t.state = TokenizeState_backslash; - break; - case '{': - begin_token(&t, TokenIdLBrace); - break; - case '}': - begin_token(&t, TokenIdRBrace); - break; - case '~': - begin_token(&t, TokenIdTilde); - break; - case '.': - begin_token(&t, TokenIdDot); - t.state = TokenizeState_period; - break; - case '-': - begin_token(&t, TokenIdDash); - t.state = TokenizeState_minus; - break; - case '/': - begin_token(&t, TokenIdSlash); - t.state = TokenizeState_slash; - break; - case '&': - begin_token(&t, TokenIdAmpersand); - t.state = TokenizeState_ampersand; - break; - case '0': - t.state = TokenizeState_zero; - begin_token(&t, TokenIdIntLiteral); - break; - case DIGIT_NON_ZERO: - t.state = TokenizeState_int_literal_dec; - begin_token(&t, TokenIdIntLiteral); - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_saw_at_sign: - switch (c) { - case 0: - invalid_eof(&t); - goto eof; - case '"': - t.out->ids.last() = TokenIdIdentifier; - t.state = TokenizeState_string_literal; - break; - case IDENTIFIER_CHAR: - t.state = TokenizeState_builtin; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_ampersand: - switch (c) { - case 0: - goto eof; - case '&': - tokenize_error(&t, "`&&` is invalid. Note that `and` is boolean AND"); - break; - case '=': - t.out->ids.last() = TokenIdBitAndEq; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_asterisk: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdTimesEq; - t.state = TokenizeState_start; - break; - case '*': - t.out->ids.last() = TokenIdStarStar; - t.state = TokenizeState_start; - break; - case '%': - t.state = TokenizeState_asterisk_percent; - break; - case '|': - t.state = TokenizeState_asterisk_pipe; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_asterisk_percent: - switch (c) { - case 0: - t.out->ids.last() = TokenIdTimesPercent; - goto eof; - case '=': - t.out->ids.last() = TokenIdTimesPercentEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdTimesPercent; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_asterisk_pipe: - switch (c) { - case 0: - t.out->ids.last() = TokenIdTimesPipe; - goto eof; - case '=': - t.out->ids.last() = TokenIdTimesPipeEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdTimesPipe; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_percent: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdModEq; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_plus: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdPlusEq; - t.state = TokenizeState_start; - break; - case '+': - t.out->ids.last() = TokenIdPlusPlus; - t.state = TokenizeState_start; - break; - case '%': - t.state = TokenizeState_plus_percent; - break; - case '|': - t.state = TokenizeState_plus_pipe; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_plus_percent: - switch (c) { - case 0: - t.out->ids.last() = TokenIdPlusPercent; - goto eof; - case '=': - t.out->ids.last() = TokenIdPlusPercentEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdPlusPercent; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_plus_pipe: - switch (c) { - case 0: - t.out->ids.last() = TokenIdPlusPipe; - goto eof; - case '=': - t.out->ids.last() = TokenIdPlusPipeEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdPlusPipe; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_caret: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdBitXorEq; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_identifier: - switch (c) { - case 0: { - uint32_t start_pos = t.out->locs.last().offset; - t.out->ids.last() = zig_keyword_token( - source + start_pos, t.pos - start_pos); - goto eof; - } - case IDENTIFIER_CHAR: - break; - default: { - uint32_t start_pos = t.out->locs.last().offset; - t.out->ids.last() = zig_keyword_token( - source + start_pos, t.pos - start_pos); - - t.state = TokenizeState_start; - continue; - } - } - break; - case TokenizeState_builtin: - switch (c) { - case 0: - goto eof; - case IDENTIFIER_CHAR: - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_backslash: - switch (c) { - case '\\': - t.state = TokenizeState_multiline_string_literal_line; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_string_literal: - switch (c) { - case 0: - invalid_eof(&t); - goto eof; - case '\\': - t.state = TokenizeState_string_literal_backslash; - break; - case '"': - t.state = TokenizeState_start; - break; - case '\n': - case '\r': - tokenize_error(&t, "newline not allowed in string literal"); - break; - default: - break; - } - break; - case TokenizeState_string_literal_backslash: - switch (c) { - case 0: - invalid_eof(&t); - goto eof; - case '\n': - case '\r': - tokenize_error(&t, "newline not allowed in string literal"); - break; - default: - t.state = TokenizeState_string_literal; - break; - } - break; - case TokenizeState_char_literal: - if (c == 0) { - invalid_eof(&t); - goto eof; - } else if (c == '\\') { - t.state = TokenizeState_char_literal_backslash; - } else if (c == '\'') { - tokenize_error(&t, "expected character"); - } else if ((c >= 0x80 && c <= 0xbf) || c >= 0xf8) { - // 10xxxxxx - // 11111xxx - invalid_char_error(&t, c); - } else if (c >= 0xc0 && c <= 0xdf) { - // 110xxxxx - remaining_code_units = 1; - t.state = TokenizeState_char_literal_unicode; - } else if (c >= 0xe0 && c <= 0xef) { - // 1110xxxx - remaining_code_units = 2; - t.state = TokenizeState_char_literal_unicode; - } else if (c >= 0xf0 && c <= 0xf7) { - // 11110xxx - remaining_code_units = 3; - t.state = TokenizeState_char_literal_unicode; - } else { - t.state = TokenizeState_char_literal_end; - } - break; - case TokenizeState_char_literal_backslash: - switch (c) { - case 0: - invalid_eof(&t); - goto eof; - case '\n': - case '\r': - tokenize_error(&t, "newline not allowed in character literal"); - break; - case 'x': - t.state = TokenizeState_char_literal_hex_escape; - seen_escape_digits = 0; - break; - case 'u': - t.state = TokenizeState_char_literal_unicode_escape_saw_u; - break; - case 'U': - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_char_literal_end; - break; - } - break; - case TokenizeState_char_literal_hex_escape: - switch (c) { - case ALPHA: - case DIGIT: - seen_escape_digits += 1; - if (seen_escape_digits == 2) { - t.state = TokenizeState_char_literal_end; - } - break; - default: - tokenize_error(&t, "expected hex digit"); - break; - } - break; - case TokenizeState_char_literal_unicode_escape_saw_u: - switch (c) { - case '{': - t.state = TokenizeState_char_literal_unicode_escape; - seen_escape_digits = 0; - break; - default: - tokenize_error(&t, "expected '{' to begin unicode escape sequence"); - break; - } - break; - case TokenizeState_char_literal_unicode_escape: - switch (c) { - case ALPHA: - case DIGIT: - seen_escape_digits += 1; - break; - case '}': - if (seen_escape_digits == 0) { - tokenize_error(&t, "empty unicode escape sequence"); - break; - } - t.state = TokenizeState_char_literal_end; - break; - default: - tokenize_error(&t, "expected hex digit"); - break; - } - break; - case TokenizeState_char_literal_end: - switch (c) { - case '\'': - t.state = TokenizeState_start; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_char_literal_unicode: - if (c >= 0x80 && c <= 0xbf) { - remaining_code_units -= 1; - if (remaining_code_units == 0) { - t.state = TokenizeState_char_literal_end; - } - } else { - invalid_char_error(&t, c); - } - break; - case TokenizeState_multiline_string_literal_line: - switch (c) { - case 0: - goto eof; - case '\n': - t.state = TokenizeState_start; - break; - default: - break; - } - break; - case TokenizeState_bang: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdCmpNotEq; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_pipe: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdBitOrEq; - t.state = TokenizeState_start; - break; - case '|': - t.out->ids.last() = TokenIdBarBar; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_equal: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdCmpEq; - t.state = TokenizeState_start; - break; - case '>': - t.out->ids.last() = TokenIdFatArrow; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_minus: - switch (c) { - case 0: - goto eof; - case '>': - t.out->ids.last() = TokenIdArrow; - t.state = TokenizeState_start; - break; - case '=': - t.out->ids.last() = TokenIdMinusEq; - t.state = TokenizeState_start; - break; - case '%': - t.state = TokenizeState_minus_percent; - break; - case '|': - t.state = TokenizeState_minus_pipe; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_minus_percent: - switch (c) { - case 0: - t.out->ids.last() = TokenIdMinusPercent; - goto eof; - case '=': - t.out->ids.last() = TokenIdMinusPercentEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdMinusPercent; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_minus_pipe: - switch (c) { - case 0: - t.out->ids.last() = TokenIdMinusPipe; - goto eof; - case '=': - t.out->ids.last() = TokenIdMinusPipeEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdMinusPipe; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_angle_bracket_left: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdCmpLessOrEq; - t.state = TokenizeState_start; - break; - case '<': - t.state = TokenizeState_angle_bracket_angle_bracket_left; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_angle_bracket_angle_bracket_left: - switch (c) { - case 0: - t.out->ids.last() = TokenIdBitShiftLeft; - goto eof; - case '=': - t.out->ids.last() = TokenIdBitShiftLeftEq; - t.state = TokenizeState_start; - break; - case '|': - t.state = TokenizeState_angle_bracket_angle_bracket_left_pipe; - break; - default: - t.out->ids.last() = TokenIdBitShiftLeft; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_angle_bracket_angle_bracket_left_pipe: - switch (c) { - case 0: - t.out->ids.last() = TokenIdBitShiftLeftPipe; - goto eof; - case '=': - t.out->ids.last() = TokenIdBitShiftLeftPipeEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdBitShiftLeftPipe; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_angle_bracket_right: - switch (c) { - case 0: - goto eof; - case '=': - t.out->ids.last() = TokenIdCmpGreaterOrEq; - t.state = TokenizeState_start; - break; - case '>': - t.state = TokenizeState_angle_bracket_angle_bracket_right; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_angle_bracket_angle_bracket_right: - switch (c) { - case 0: - t.out->ids.last() = TokenIdBitShiftRight; - goto eof; - case '=': - t.out->ids.last() = TokenIdBitShiftRightEq; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdBitShiftRight; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_period: - switch (c) { - case 0: - goto eof; - case '.': - t.state = TokenizeState_period_2; - break; - case '*': - t.state = TokenizeState_period_asterisk; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_period_2: - switch (c) { - case 0: - t.out->ids.last() = TokenIdEllipsis2; - goto eof; - case '.': - t.out->ids.last() = TokenIdEllipsis3; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdEllipsis2; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_period_asterisk: - switch (c) { - case 0: - t.out->ids.last() = TokenIdDotStar; - goto eof; - case '*': - tokenize_error(&t, "`.*` cannot be followed by `*`. Are you missing a space?"); - break; - default: - t.out->ids.last() = TokenIdDotStar; - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_slash: - switch (c) { - case 0: - goto eof; - case '/': - t.state = TokenizeState_line_comment_start; - break; - case '=': - t.out->ids.last() = TokenIdDivEq; - t.state = TokenizeState_start; - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_line_comment_start: - switch (c) { - case 0: - goto eof; - case '/': - t.state = TokenizeState_doc_comment_start; - break; - case '!': - t.out->ids.last() = TokenIdContainerDocComment; - t.state = TokenizeState_container_doc_comment; - break; - case '\n': - cancel_token(&t); - t.state = TokenizeState_start; - break; - default: - cancel_token(&t); - t.state = TokenizeState_line_comment; - break; - } - break; - case TokenizeState_doc_comment_start: - switch (c) { - case 0: - t.out->ids.last() = TokenIdDocComment; - goto eof; - case '/': - cancel_token(&t); - t.state = TokenizeState_line_comment; - break; - case '\n': - t.out->ids.last() = TokenIdDocComment; - t.state = TokenizeState_start; - break; - default: - t.out->ids.last() = TokenIdDocComment; - t.state = TokenizeState_doc_comment; - break; - } - break; - case TokenizeState_line_comment: - switch (c) { - case 0: - goto eof; - case '\n': - t.state = TokenizeState_start; - break; - default: - break; - } - break; - case TokenizeState_doc_comment: - case TokenizeState_container_doc_comment: - switch (c) { - case 0: - goto eof; - case '\n': - t.state = TokenizeState_start; - break; - default: - // do nothing - break; - } - break; - case TokenizeState_zero: - switch (c) { - case 0: - goto eof; - case 'b': - t.state = TokenizeState_int_literal_bin_no_underscore; - break; - case 'o': - t.state = TokenizeState_int_literal_oct_no_underscore; - break; - case 'x': - t.state = TokenizeState_int_literal_hex_no_underscore; - break; - case DIGIT: - case '_': - case '.': - case 'e': - case 'E': - // Reinterpret as a decimal number. - t.state = TokenizeState_int_literal_dec; - continue; - case ALPHA_EXCEPT_E_B_O_X: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_int_literal_bin_no_underscore: - switch (c) { - case '0': - case '1': - t.state = TokenizeState_int_literal_bin; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_int_literal_bin: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_int_literal_bin_no_underscore; - break; - case '0': - case '1': - break; - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case ALPHA: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_int_literal_oct_no_underscore: - switch (c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - t.state = TokenizeState_int_literal_oct; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_int_literal_oct: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_int_literal_oct_no_underscore; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - break; - case ALPHA: - case '8': - case '9': - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_int_literal_dec_no_underscore: - switch (c) { - case DIGIT: - t.state = TokenizeState_int_literal_dec; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_int_literal_dec: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_int_literal_dec_no_underscore; - break; - case '.': - t.state = TokenizeState_num_dot_dec; - t.out->ids.last() = TokenIdFloatLiteral; - break; - case 'e': - case 'E': - t.state = TokenizeState_float_exponent_unsigned; - t.out->ids.last() = TokenIdFloatLiteral; - break; - case DIGIT: - break; - case ALPHA_EXCEPT_E: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_int_literal_hex_no_underscore: - switch (c) { - case HEXDIGIT: - t.state = TokenizeState_int_literal_hex; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_int_literal_hex: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_int_literal_hex_no_underscore; - break; - case '.': - t.state = TokenizeState_num_dot_hex; - t.out->ids.last() = TokenIdFloatLiteral; - break; - case 'p': - case 'P': - t.state = TokenizeState_float_exponent_unsigned; - t.out->ids.last() = TokenIdFloatLiteral; - break; - case HEXDIGIT: - break; - case ALPHA_EXCEPT_HEX_AND_P: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_num_dot_dec: - switch (c) { - case 0: - goto eof; - case '.': - t.out->ids.last() = TokenIdIntLiteral; - t.pos -= 1; - t.column -= 1; - t.state = TokenizeState_start; - continue; - case DIGIT: - t.state = TokenizeState_float_fraction_dec; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_num_dot_hex: - switch (c) { - case 0: - goto eof; - case '.': - t.out->ids.last() = TokenIdIntLiteral; - t.pos -= 1; - t.column -= 1; - t.state = TokenizeState_start; - continue; - case HEXDIGIT: - t.out->ids.last() = TokenIdFloatLiteral; - t.state = TokenizeState_float_fraction_hex; - break; - default: - invalid_char_error(&t, c); - break; - } - break; - case TokenizeState_float_fraction_dec_no_underscore: - switch (c) { - case DIGIT: - t.state = TokenizeState_float_fraction_dec; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_float_fraction_dec: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_float_fraction_dec_no_underscore; - break; - case 'e': - case 'E': - t.state = TokenizeState_float_exponent_unsigned; - break; - case DIGIT: - break; - case ALPHA_EXCEPT_E: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_float_fraction_hex_no_underscore: - switch (c) { - case HEXDIGIT: - t.state = TokenizeState_float_fraction_hex; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_float_fraction_hex: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_float_fraction_hex_no_underscore; - break; - case 'p': - case 'P': - t.state = TokenizeState_float_exponent_unsigned; - break; - case HEXDIGIT: - break; - case ALPHA_EXCEPT_HEX_AND_P: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - case TokenizeState_float_exponent_unsigned: - switch (c) { - case '+': - case '-': - t.state = TokenizeState_float_exponent_num_no_underscore; - break; - default: - // Reinterpret as a normal exponent number. - t.state = TokenizeState_float_exponent_num_no_underscore; - continue; - } - break; - case TokenizeState_float_exponent_num_no_underscore: - switch (c) { - case DIGIT: - t.state = TokenizeState_float_exponent_num; - break; - default: - invalid_char_error(&t, c); - } - break; - case TokenizeState_float_exponent_num: - switch (c) { - case 0: - goto eof; - case '_': - t.state = TokenizeState_float_exponent_num_no_underscore; - break; - case DIGIT: - break; - case ALPHA: - invalid_char_error(&t, c); - break; - default: - t.state = TokenizeState_start; - continue; - } - break; - } - t.pos += 1; - if (c == '\n') { - t.line += 1; - t.column = 0; - } else { - t.column += 1; - } - } -eof:; - - begin_token(&t, TokenIdEof); -} - -const char * token_name(TokenId id) { - switch (id) { - case TokenIdAmpersand: return "&"; - case TokenIdArrow: return "->"; - case TokenIdBang: return "!"; - case TokenIdBarBar: return "||"; - case TokenIdBinOr: return "|"; - case TokenIdBinXor: return "^"; - case TokenIdBitAndEq: return "&="; - case TokenIdBitOrEq: return "|="; - case TokenIdBitShiftLeft: return "<<"; - case TokenIdBitShiftLeftEq: return "<<="; - case TokenIdBitShiftLeftPipe: return "<<|"; - case TokenIdBitShiftLeftPipeEq: return "<<|="; - case TokenIdBitShiftRight: return ">>"; - case TokenIdBitShiftRightEq: return ">>="; - case TokenIdBitXorEq: return "^="; - case TokenIdCharLiteral: return "CharLiteral"; - case TokenIdCmpEq: return "=="; - case TokenIdCmpGreaterOrEq: return ">="; - case TokenIdCmpGreaterThan: return ">"; - case TokenIdCmpLessOrEq: return "<="; - case TokenIdCmpLessThan: return "<"; - case TokenIdCmpNotEq: return "!="; - case TokenIdColon: return ":"; - case TokenIdComma: return ","; - case TokenIdDash: return "-"; - case TokenIdDivEq: return "/="; - case TokenIdDocComment: return "DocComment"; - case TokenIdContainerDocComment: return "ContainerDocComment"; - case TokenIdDot: return "."; - case TokenIdDotStar: return ".*"; - case TokenIdEllipsis2: return ".."; - case TokenIdEllipsis3: return "..."; - case TokenIdEof: return "EOF"; - case TokenIdEq: return "="; - case TokenIdFatArrow: return "=>"; - case TokenIdFloatLiteral: return "FloatLiteral"; - case TokenIdIntLiteral: return "IntLiteral"; - case TokenIdKeywordAsync: return "async"; - case TokenIdKeywordAllowZero: return "allowzero"; - case TokenIdKeywordAwait: return "await"; - case TokenIdKeywordResume: return "resume"; - case TokenIdKeywordSuspend: return "suspend"; - case TokenIdKeywordAlign: return "align"; - case TokenIdKeywordAnd: return "and"; - case TokenIdKeywordAnyFrame: return "anyframe"; - case TokenIdKeywordAnyType: return "anytype"; - case TokenIdKeywordAsm: return "asm"; - case TokenIdKeywordBreak: return "break"; - case TokenIdKeywordCatch: return "catch"; - case TokenIdKeywordCallconv: return "callconv"; - case TokenIdKeywordCompTime: return "comptime"; - case TokenIdKeywordConst: return "const"; - case TokenIdKeywordContinue: return "continue"; - case TokenIdKeywordDefer: return "defer"; - case TokenIdKeywordElse: return "else"; - case TokenIdKeywordEnum: return "enum"; - case TokenIdKeywordErrdefer: return "errdefer"; - case TokenIdKeywordError: return "error"; - case TokenIdKeywordExport: return "export"; - case TokenIdKeywordExtern: return "extern"; - case TokenIdKeywordFn: return "fn"; - case TokenIdKeywordFor: return "for"; - case TokenIdKeywordIf: return "if"; - case TokenIdKeywordInline: return "inline"; - case TokenIdKeywordNoAlias: return "noalias"; - case TokenIdKeywordNoInline: return "noinline"; - case TokenIdKeywordNoSuspend: return "nosuspend"; - case TokenIdKeywordOpaque: return "opaque"; - case TokenIdKeywordOr: return "or"; - case TokenIdKeywordOrElse: return "orelse"; - case TokenIdKeywordPacked: return "packed"; - case TokenIdKeywordPub: return "pub"; - case TokenIdKeywordReturn: return "return"; - case TokenIdKeywordLinkSection: return "linksection"; - case TokenIdKeywordStruct: return "struct"; - case TokenIdKeywordSwitch: return "switch"; - case TokenIdKeywordTest: return "test"; - case TokenIdKeywordThreadLocal: return "threadlocal"; - case TokenIdKeywordTry: return "try"; - case TokenIdKeywordUnion: return "union"; - case TokenIdKeywordUnreachable: return "unreachable"; - case TokenIdKeywordUsingNamespace: return "usingnamespace"; - case TokenIdKeywordVar: return "var"; - case TokenIdKeywordVolatile: return "volatile"; - case TokenIdKeywordWhile: return "while"; - case TokenIdLBrace: return "{"; - case TokenIdLBracket: return "["; - case TokenIdLParen: return "("; - case TokenIdQuestion: return "?"; - case TokenIdMinusEq: return "-="; - case TokenIdMinusPercent: return "-%"; - case TokenIdMinusPercentEq: return "-%="; - case TokenIdMinusPipe: return "-|"; - case TokenIdMinusPipeEq: return "-|="; - case TokenIdModEq: return "%="; - case TokenIdPercent: return "%"; - case TokenIdPlus: return "+"; - case TokenIdPlusEq: return "+="; - case TokenIdPlusPercent: return "+%"; - case TokenIdPlusPercentEq: return "+%="; - case TokenIdPlusPipe: return "+|"; - case TokenIdPlusPipeEq: return "+|="; - case TokenIdPlusPlus: return "++"; - case TokenIdRBrace: return "}"; - case TokenIdRBracket: return "]"; - case TokenIdRParen: return ")"; - case TokenIdSemicolon: return ";"; - case TokenIdSlash: return "/"; - case TokenIdStar: return "*"; - case TokenIdStarStar: return "**"; - case TokenIdStringLiteral: return "StringLiteral"; - case TokenIdMultilineStringLiteralLine: return "MultilineStringLiteralLine"; - case TokenIdIdentifier: return "Identifier"; - case TokenIdTilde: return "~"; - case TokenIdTimesEq: return "*="; - case TokenIdTimesPercent: return "*%"; - case TokenIdTimesPercentEq: return "*%="; - case TokenIdTimesPipe: return "*|"; - case TokenIdTimesPipeEq: return "*|="; - case TokenIdBuiltin: return "Builtin"; - case TokenIdCount: - zig_unreachable(); - } - return "(invalid token)"; -} -- cgit v1.2.3