diff options
| author | Emile Badenhorst <MyvarHD@gmail.com> | 2023-07-03 17:47:08 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-07-03 17:47:08 +0200 |
| commit | 116a99d3cef91046bf132b6fcecbd2708f912896 (patch) | |
| tree | e86f4008a64b87bc0868a277bd962b74c177ac9d /lib/docs/ziglexer.js | |
| parent | de227ace14e09c7c17a60f716b88d2327fee201d (diff) | |
| download | zig-116a99d3cef91046bf132b6fcecbd2708f912896.tar.gz zig-116a99d3cef91046bf132b6fcecbd2708f912896.zip | |
Autodoc: Javascript port of zig lexer. (#16306)
* Most of the js lexer ported
* Fixed lexical Ranges
* Mini test framework
* More testing and bug fixing in zig js lexer
* Removed bad import
* Implemented good html generation
Closes #15863
Diffstat (limited to 'lib/docs/ziglexer.js')
| -rw-r--r-- | lib/docs/ziglexer.js | 2109 |
1 files changed, 2109 insertions, 0 deletions
diff --git a/lib/docs/ziglexer.js b/lib/docs/ziglexer.js new file mode 100644 index 0000000000..ef85c1e431 --- /dev/null +++ b/lib/docs/ziglexer.js @@ -0,0 +1,2109 @@ +const Tag = { + invalid: "invalid", + identifier: "identifier", + string_literal: "string_literal", + multiline_string_literal_line: "multiline_string_literal_line", + char_literal: "char_literal", + eof: "eof", + builtin: "builtin", + number_literal: "number_literal", + doc_comment: "doc_comment", + container_doc_comment: "container_doc_comment", + invalid_periodasterisks: "invalid_periodasterisks", + bang: "bang", + pipe: "pipe", + pipe_pipe: "pipe_pipe", + pipe_equal: "pipe_equal", + equal: "equal", + equal_equal: "equal_equal", + equal_angle_bracket_right: "equal_angle_bracket_right", + bang_equal: "bang_equal", + l_paren: "l_paren", + r_paren: "r_paren", + semicolon: "semicolon", + percent: "percent", + percent_equal: "percent_equal", + l_brace: "l_brace", + r_brace: "r_brace", + l_bracket: "l_bracket", + r_bracket: "r_bracket", + period: "period", + period_asterisk: "period_asterisk", + ellipsis2: "ellipsis2", + ellipsis3: "ellipsis3", + caret: "caret", + caret_equal: "caret_equal", + plus: "plus", + plus_plus: "plus_plus", + plus_equal: "plus_equal", + plus_percent: "plus_percent", + plus_percent_equal: "plus_percent_equal", + plus_pipe: "plus_pipe", + plus_pipe_equal: "plus_pipe_equal", + minus: "minus", + minus_equal: "minus_equal", + minus_percent: "minus_percent", + minus_percent_equal: "minus_percent_equal", + minus_pipe: "minus_pipe", + minus_pipe_equal: "minus_pipe_equal", + asterisk: "asterisk", + asterisk_equal: "asterisk_equal", + asterisk_asterisk: "asterisk_asterisk", + asterisk_percent: "asterisk_percent", + asterisk_percent_equal: "asterisk_percent_equal", + asterisk_pipe: "asterisk_pipe", + asterisk_pipe_equal: "asterisk_pipe_equal", + arrow: "arrow", + colon: "colon", + slash: "slash", + slash_equal: "slash_equal", + comma: "comma", + ampersand: "ampersand", + ampersand_equal: "ampersand_equal", + question_mark: "question_mark", + angle_bracket_left: "angle_bracket_left", + angle_bracket_left_equal: "angle_bracket_left_equal", + angle_bracket_angle_bracket_left: "angle_bracket_angle_bracket_left", + angle_bracket_angle_bracket_left_equal: "angle_bracket_angle_bracket_left_equal", + angle_bracket_angle_bracket_left_pipe: "angle_bracket_angle_bracket_left_pipe", + angle_bracket_angle_bracket_left_pipe_equal: "angle_bracket_angle_bracket_left_pipe_equal", + angle_bracket_right: "angle_bracket_right", + angle_bracket_right_equal: "angle_bracket_right_equal", + angle_bracket_angle_bracket_right: "angle_bracket_angle_bracket_right", + angle_bracket_angle_bracket_right_equal: "angle_bracket_angle_bracket_right_equal", + tilde: "tilde", + keyword_addrspace: "keyword_addrspace", + keyword_align: "keyword_align", + keyword_allowzero: "keyword_allowzero", + keyword_and: "keyword_and", + keyword_anyframe: "keyword_anyframe", + keyword_anytype: "keyword_anytype", + keyword_asm: "keyword_asm", + keyword_async: "keyword_async", + keyword_await: "keyword_await", + keyword_break: "keyword_break", + keyword_callconv: "keyword_callconv", + keyword_catch: "keyword_catch", + keyword_comptime: "keyword_comptime", + keyword_const: "keyword_const", + keyword_continue: "keyword_continue", + keyword_defer: "keyword_defer", + keyword_else: "keyword_else", + keyword_enum: "keyword_enum", + keyword_errdefer: "keyword_errdefer", + keyword_error: "keyword_error", + keyword_export: "keyword_export", + keyword_extern: "keyword_extern", + keyword_fn: "keyword_fn", + keyword_for: "keyword_for", + keyword_if: "keyword_if", + keyword_inline: "keyword_inline", + keyword_noalias: "keyword_noalias", + keyword_noinline: "keyword_noinline", + keyword_nosuspend: "keyword_nosuspend", + keyword_opaque: "keyword_opaque", + keyword_or: "keyword_or", + keyword_orelse: "keyword_orelse", + keyword_packed: "keyword_packed", + keyword_pub: "keyword_pub", + keyword_resume: "keyword_resume", + keyword_return: "keyword_return", + keyword_linksection: "keyword_linksection", + keyword_struct: "keyword_struct", + keyword_suspend: "keyword_suspend", + keyword_switch: "keyword_switch", + keyword_test: "keyword_test", + keyword_threadlocal: "keyword_threadlocal", + keyword_try: "keyword_try", + keyword_union: "keyword_union", + keyword_unreachable: "keyword_unreachable", + keyword_usingnamespace: "keyword_usingnamespace", + keyword_var: "keyword_var", + keyword_volatile: "keyword_volatile", + keyword_while: "keyword_while" +} + +const State = { + start: 0, + identifier: 1, + builtin: 2, + string_literal: 3, + string_literal_backslash: 4, + multiline_string_literal_line: 5, + char_literal: 6, + char_literal_backslash: 7, + char_literal_hex_escape: 8, + char_literal_unicode_escape_saw_u: 9, + char_literal_unicode_escape: 10, + char_literal_unicode_invalid: 11, + char_literal_unicode: 12, + char_literal_end: 13, + backslash: 14, + equal: 15, + bang: 16, + pipe: 17, + minus: 18, + minus_percent: 19, + minus_pipe: 20, + asterisk: 21, + asterisk_percent: 22, + asterisk_pipe: 23, + slash: 24, + line_comment_start: 25, + line_comment: 26, + doc_comment_start: 27, + doc_comment: 28, + int: 29, + int_exponent: 30, + int_period: 31, + float: 32, + float_exponent: 33, + ampersand: 34, + caret: 35, + percent: 36, + plus: 37, + plus_percent: 38, + plus_pipe: 39, + angle_bracket_left: 40, + angle_bracket_angle_bracket_left: 41, + angle_bracket_angle_bracket_left_pipe: 42, + angle_bracket_right: 43, + angle_bracket_angle_bracket_right: 44, + period: 45, + period_2: 46, + period_asterisk: 47, + saw_at_sign: 48, +} + +const keywords = { + "addrspace": Tag.keyword_addrspace, + "align": Tag.keyword_align, + "allowzero": Tag.keyword_allowzero, + "and": Tag.keyword_and, + "anyframe": Tag.keyword_anyframe, + "anytype": Tag.keyword_anytype, + "asm": Tag.keyword_asm, + "async": Tag.keyword_async, + "await": Tag.keyword_await, + "break": Tag.keyword_break, + "callconv": Tag.keyword_callconv, + "catch": Tag.keyword_catch, + "comptime": Tag.keyword_comptime, + "const": Tag.keyword_const, + "continue": Tag.keyword_continue, + "defer": Tag.keyword_defer, + "else": Tag.keyword_else, + "enum": Tag.keyword_enum, + "errdefer": Tag.keyword_errdefer, + "error": Tag.keyword_error, + "export": Tag.keyword_export, + "extern": Tag.keyword_extern, + "fn": Tag.keyword_fn, + "for": Tag.keyword_for, + "if": Tag.keyword_if, + "inline": Tag.keyword_inline, + "noalias": Tag.keyword_noalias, + "noinline": Tag.keyword_noinline, + "nosuspend": Tag.keyword_nosuspend, + "opaque": Tag.keyword_opaque, + "or": Tag.keyword_or, + "orelse": Tag.keyword_orelse, + "packed": Tag.keyword_packed, + "pub": Tag.keyword_pub, + "resume": Tag.keyword_resume, + "return": Tag.keyword_return, + "linksection": Tag.keyword_linksection, + "struct": Tag.keyword_struct, + "suspend": Tag.keyword_suspend, + "switch": Tag.keyword_switch, + "test": Tag.keyword_test, + "threadlocal": Tag.keyword_threadlocal, + "try": Tag.keyword_try, + "union": Tag.keyword_union, + "unreachable": Tag.keyword_unreachable, + "usingnamespace": Tag.keyword_usingnamespace, + "var": Tag.keyword_var, + "volatile": Tag.keyword_volatile, + "while": Tag.keyword_while, +}; + +function make_token(tag, start, end) { + return { + tag: tag, + loc: { + start: start, + end: end + } + } + +} + +function dump_tokens(tokens, raw_source) { + + //TODO: this is not very fast + function find_tag_key(tag) { + for (const [key, value] of Object.entries(Tag)) { + if (value == tag) return key; + } + } + + for (let i = 0; i < tokens.length; i++) { + const tok = tokens[i]; + const z = raw_source.substring(tok.loc.start, tok.loc.end).toLowerCase(); + console.log(`${find_tag_key(tok.tag)} "${tok.tag}" '${z}'`) + } +} + + + +function tokenize_zig_source(raw_source) { + + var index = 0; + var flag = false; + + let seen_escape_digits = undefined; + let remaining_code_units = undefined; + + const next = () => { + let state = State.start; + + var result = { + tag: -1, + loc: { + start: index, + end: undefined, + }, + }; + + //having a while (true) loop seems like a bad idea the loop should never + //take more iterations than twice the length of the source code + const MAX_ITERATIONS = raw_source.length * 2; + let iterations = 0; + + while (iterations <= MAX_ITERATIONS) { + + if (flag) { + return make_token(Tag.eof, index - 2, index - 2); + } + iterations += 1; // avoid death loops + + var c = raw_source[index]; + + if (c === undefined) { + c = ' '; // push the last token + flag = true; + } + + switch (state) { + case State.start: + switch (c) { + case 0: { + if (index != raw_source.length) { + result.tag = Tag.invalid; + result.loc.start = index; + index += 1; + result.loc.end = index; + return result; + } + result.loc.end = index; + return result; + } + case ' ': + case '\n': + case '\t': + case '\r': { + result.loc.start = index + 1; + break; + } + case '"': { + state = State.string_literal; + result.tag = Tag.string_literal; + break; + } + case '\'': { + state = State.char_literal; + break; + } + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': { + state = State.identifier; + result.tag = Tag.identifier; + break; + } + case '@': { + state = State.saw_at_sign; + break; + } + case '=': { + state = State.equal; + break; + } + case '!': { + state = State.bang; + break; + } + case '|': { + state = State.pipe; + break; + } + case '(': { + result.tag = Tag.l_paren; + index += 1; + result.loc.end = index; + + return result; + + } + case ')': { + result.tag = Tag.r_paren; + index += 1; result.loc.end = index; + return result; + + } + case '[': { + result.tag = Tag.l_bracket; + index += 1; result.loc.end = index; + return result; + + } + case ']': { + result.tag = Tag.r_bracket; + index += 1; result.loc.end = index; + return result; + + } + case ';': { + result.tag = Tag.semicolon; + index += 1; result.loc.end = index; + return result; + + } + case ',': { + result.tag = Tag.comma; + index += 1; result.loc.end = index; + return result; + + } + case '?': { + result.tag = Tag.question_mark; + index += 1; result.loc.end = index; + return result; + + } + case ':': { + result.tag = Tag.colon; + index += 1; result.loc.end = index; + return result; + + } + case '%': { + state = State.percent; break; + } + case '*': { + state = State.asterisk; break; + } + case '+': { + state = State.plus; break; + } + case '<': { + state = State.angle_bracket_left; break; + } + case '>': { + state = State.angle_bracket_right; break; + } + case '^': { + state = State.caret; break; + } + case '\\': { + state = State.backslash; + result.tag = Tag.multiline_string_literal_line; break; + } + case '{': { + result.tag = Tag.l_brace; + index += 1; result.loc.end = index; + return result; + + } + case '}': { + result.tag = Tag.r_brace; + index += 1; result.loc.end = index; + return result; + + } + case '~': { + result.tag = Tag.tilde; + index += 1; result.loc.end = index; + return result; + + } + case '.': { + state = State.period; break; + } + case '-': { + state = State.minus; break; + } + case '/': { + state = State.slash; break; + } + case '&': { + state = State.ampersand; break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + state = State.int; + result.tag = Tag.number_literal; break; + } + default: { + result.tag = Tag.invalid; + result.loc.end = index; + index += 1; + return result; + } + } + break; + case State.saw_at_sign: + switch (c) { + case '"': { + result.tag = Tag.identifier; + state = State.string_literal; break; + } + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': { + state = State.builtin; + result.tag = Tag.builtin; + break; + } + default: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + } + break; + case State.ampersand: + switch (c) { + case '=': { + result.tag = Tag.ampersand_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.ampersand; result.loc.end = index; + return result; + } + } + break; + case State.asterisk: switch (c) { + case '=': { + result.tag = Tag.asterisk_equal; + index += 1; result.loc.end = index; + return result; + } + case '*': { + result.tag = Tag.asterisk_asterisk; + index += 1; result.loc.end = index; + return result; + } + case '%': { + state = State.asterisk_percent; break; + } + case '|': { + state = State.asterisk_pipe; break; + } + default: { + result.tag = Tag.asterisk; + result.loc.end = index; + return result; + } + } + break; + case State.asterisk_percent: + switch (c) { + case '=': { + result.tag = Tag.asterisk_percent_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.asterisk_percent; + result.loc.end = index; + return result; + } + } + break; + case State.asterisk_pipe: + switch (c) { + case '=': { + result.tag = Tag.asterisk_pipe_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.asterisk_pipe; result.loc.end = index; + return result; + } + } + break; + case State.percent: + switch (c) { + case '=': { + result.tag = Tag.percent_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.percent; result.loc.end = index; + return result; + } + } + break; + case State.plus: + switch (c) { + case '=': { + result.tag = Tag.plus_equal; + index += 1; result.loc.end = index; + return result; + } + case '+': { + result.tag = Tag.plus_plus; + index += 1; result.loc.end = index; + return result; + } + case '%': { + state = State.plus_percent; break; + } + case '|': { + state = State.plus_pipe; break; + } + default: { + result.tag = Tag.plus; result.loc.end = index; + return result; + } + } + break; + case State.plus_percent: + switch (c) { + case '=': { + result.tag = Tag.plus_percent_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.plus_percent; result.loc.end = index; + return result; + } + } + break; + case State.plus_pipe: + switch (c) { + case '=': { + result.tag = Tag.plus_pipe_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.plus_pipe; result.loc.end = index; + return result; + } + } + break; + case State.caret: + switch (c) { + case '=': { + result.tag = Tag.caret_equal; + index += 1; result.loc.end = index; + return result; + } + default: { + result.tag = Tag.caret; result.loc.end = index; + return result; + } + } + break; + case State.identifier: + switch (c) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': break; + default: { + // if (Token.getKeyword(buffer[result.loc.start..index])) | tag | { + const z = raw_source.substring(result.loc.start, index); + if (z in keywords) { + result.tag = keywords[z]; + } + result.loc.end = index; + return result; + } + + + } + break; + case State.builtin: switch (c) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': break; + default: result.loc.end = index; + return result; + } + break; + case State.backslash: + switch (c) { + case '\\': { + state = State.multiline_string_literal_line; + break; + } + default: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + } + break; + case State.string_literal: + switch (c) { + case '\\': { + state = State.string_literal_backslash; break; + } + case '"': { + index += 1; + result.loc.end = index; + + return result; + } + case 0: { + //TODO: PORT + // if (index == buffer.len) { + // result.tag = .invalid; + // break; + // } else { + // checkLiteralCharacter(); + // } + result.loc.end = index; + return result; + } + case '\n': { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + //TODO: PORT + //default: checkLiteralCharacter(), + } + break; + case State.string_literal_backslash: + switch (c) { + case 0: + case '\n': { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + default: { + state = State.string_literal; break; + } + } + break; + case State.char_literal: switch (c) { + case 0: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + case '\\': { + state = State.char_literal_backslash; + break; + } + //TODO: PORT + // '\'', 0x80...0xbf, 0xf8...0xff => { + // result.tag = .invalid; + // break; + // }, + // 0xc0...0xdf => { // 110xxxxx + // remaining_code_units = 1; + // state = .char_literal_unicode; + // }, + // 0xe0...0xef => { // 1110xxxx + // remaining_code_units = 2; + // state = .char_literal_unicode; + // }, + // 0xf0...0xf7 => { // 11110xxx + // remaining_code_units = 3; + // state = .char_literal_unicode; + // }, + + // case 0x80: + // case 0x81: + // case 0x82: + // case 0x83: + // case 0x84: + // case 0x85: + // case 0x86: + // case 0x87: + // case 0x88: + // case 0x89: + // case 0x8a: + // case 0x8b: + // case 0x8c: + // case 0x8d: + // case 0x8e: + // case 0x8f: + // case 0x90: + // case 0x91: + // case 0x92: + // case 0x93: + // case 0x94: + // case 0x95: + // case 0x96: + // case 0x97: + // case 0x98: + // case 0x99: + // case 0x9a: + // case 0x9b: + // case 0x9c: + // case 0x9d: + // case 0x9e: + // case 0x9f: + // case 0xa0: + // case 0xa1: + // case 0xa2: + // case 0xa3: + // case 0xa4: + // case 0xa5: + // case 0xa6: + // case 0xa7: + // case 0xa8: + // case 0xa9: + // case 0xaa: + // case 0xab: + // case 0xac: + // case 0xad: + // case 0xae: + // case 0xaf: + // case 0xb0: + // case 0xb1: + // case 0xb2: + // case 0xb3: + // case 0xb4: + // case 0xb5: + // case 0xb6: + // case 0xb7: + // case 0xb8: + // case 0xb9: + // case 0xba: + // case 0xbb: + // case 0xbc: + // case 0xbd: + // case 0xbe: + // case 0xbf: + // case 0xf8: + // case 0xf9: + // case 0xfa: + // case 0xfb: + // case 0xfc: + // case 0xfd: + // case 0xfe: + // case 0xff: + // result.tag = .invalid; + // break; + // case 0xc0: + // case 0xc1: + // case 0xc2: + // case 0xc3: + // case 0xc4: + // case 0xc5: + // case 0xc6: + // case 0xc7: + // case 0xc8: + // case 0xc9: + // case 0xca: + // case 0xcb: + // case 0xcc: + // case 0xcd: + // case 0xce: + // case 0xcf: + // case 0xd0: + // case 0xd1: + // case 0xd2: + // case 0xd3: + // case 0xd4: + // case 0xd5: + // case 0xd6: + // case 0xd7: + // case 0xd8: + // case 0xd9: + // case 0xda: + // case 0xdb: + // case 0xdc: + // case 0xdd: + // case 0xde: + // case 0xdf: + // remaining_code_units = 1; + // state = .char_literal_unicode; + // case 0xe0: + // case 0xe1: + // case 0xe2: + // case 0xe3: + // case 0xe4: + // case 0xe5: + // case 0xe6: + // case 0xe7: + // case 0xe8: + // case 0xe9: + // case 0xea: + // case 0xeb: + // case 0xec: + // case 0xed: + // case 0xee: + // case 0xef: + // remaining_code_units = 2; + // state = .char_literal_unicode; + // case 0xf0: + // case 0xf1: + // case 0xf2: + // case 0xf3: + // case 0xf4: + // case 0xf5: + // case 0xf6: + // case 0xf7: + // remaining_code_units = 3; + // state = .char_literal_unicode; + + case '\n': { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + default: { + state = State.char_literal_end; break; + } + } + break; + case State.char_literal_backslash: + switch (c) { + case 0: + case '\n': { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + case 'x': { + state = State.char_literal_hex_escape; + seen_escape_digits = 0; break; + } + case 'u': { + state = State.char_literal_unicode_escape_saw_u; break; + } + default: { + state = State.char_literal_end; break; + } + } + break; + case State.char_literal_hex_escape: + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': { + seen_escape_digits += 1; + if (seen_escape_digits == 2) { + state = State.char_literal_end; + } break; + } + default: { + result.tag = Tag.invalid; + esult.loc.end = index; + return result; + } + } + break; + case State.char_literal_unicode_escape_saw_u: + switch (c) { + case 0: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + case '{': { + state = State.char_literal_unicode_escape; break; + } + default: { + result.tag = Tag.invalid; + state = State.char_literal_unicode_invalid; break; + } + } + break; + case State.char_literal_unicode_escape: + switch (c) { + case 0: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': break; + case '}': { + state = State.char_literal_end; // too many/few digits handled later + break; + } + default: { + result.tag = Tag.invalid; + state = State.char_literal_unicode_invalid; break; + } + } + break; + case State.char_literal_unicode_invalid: + switch (c) { + // Keep consuming characters until an obvious stopping point. + // This consolidates e.g. `u{0ab1Q}` into a single invalid token + // instead of creating the tokens `u{0ab1`, `Q`, `}` + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '}': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': break; + default: break; + } + break; + case State.char_literal_end: + switch (c) { + case '\'': { + result.tag = Tag.char_literal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + } + break; + case State.char_literal_unicode: + switch (c) { + // 0x80...0xbf => { + // remaining_code_units -= 1; + // if (remaining_code_units == 0) { + // state = .char_literal_end; + // } + // }, + default: { + result.tag = Tag.invalid; + result.loc.end = index; + return result; + } + } + break; + case State.multiline_string_literal_line: + switch (c) { + case 0: + result.loc.end = index; + return result; + case '\n': { + + index += 1; + result.loc.end = index; + return result; + } + case '\t': break; + //TODO: PORT + //default: checkLiteralCharacter(), + + } + break; + case State.bang: + switch (c) { + case '=': { + result.tag = Tag.bang_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.bang; + result.loc.end = index; + return result; + } + } + break; + case State.pipe: + switch (c) { + case '=': { + result.tag = Tag.pipe_equal; + index += 1; + result.loc.end = index; + return result; + } + case '|': { + result.tag = Tag.pipe_pipe; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.pipe; + result.loc.end = index; + return result; + } + } + break; + case State.equal: switch (c) { + case '=': { + result.tag = Tag.equal_equal; + index += 1; + result.loc.end = index; + return result; + } + case '>': { + result.tag = Tag.equal_angle_bracket_right; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.equal; + result.loc.end = index; + return result; + } + } + break; + case State.minus: switch (c) { + case '>': { + result.tag = Tag.arrow; + index += 1; + result.loc.end = index; + return result; + } + case '=': { + result.tag = Tag.minus_equal; + index += 1; + result.loc.end = index; + return result; + } + case '%': { + state = State.minus_percent; break; + } + case '|': { + state = State.minus_pipe; break; + } + default: { + result.tag = Tag.minus; + result.loc.end = index; + return result; + } + } + break; + case State.minus_percent: + switch (c) { + case '=': { + result.tag = Tag.minus_percent_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.minus_percent; + result.loc.end = index; + return result; + } + } + break; + case State.minus_pipe: + switch (c) { + case '=': { + result.tag = Tag.minus_pipe_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.minus_pipe; + result.loc.end = index; + return result; + } + } + break; + case State.angle_bracket_left: + switch (c) { + case '<': { + state = State.angle_bracket_angle_bracket_left; break; + } + case '=': { + result.tag = Tag.angle_bracket_left_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.angle_bracket_left; + result.loc.end = index; + return result; + } + } + break; + case State.angle_bracket_angle_bracket_left: + switch (c) { + case '=': { + result.tag = Tag.angle_bracket_angle_bracket_left_equal; + index += 1; + result.loc.end = index; + return result; + } + case '|': { + state = State.angle_bracket_angle_bracket_left_pipe; + } + default: { + result.tag = Tag.angle_bracket_angle_bracket_left; + result.loc.end = index; + return result; + } + } + break; + case State.angle_bracket_angle_bracket_left_pipe: + switch (c) { + case '=': { + result.tag = Tag.angle_bracket_angle_bracket_left_pipe_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.angle_bracket_angle_bracket_left_pipe; + result.loc.end = index; + return result; + } + } + break; + case State.angle_bracket_right: + switch (c) { + case '>': { + state = State.angle_bracket_angle_bracket_right; break; + } + case '=': { + result.tag = Tag.angle_bracket_right_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.angle_bracket_right; + result.loc.end = index; + return result; + } + } + break; + case State.angle_bracket_angle_bracket_right: + switch (c) { + case '=': { + result.tag = Tag.angle_bracket_angle_bracket_right_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.angle_bracket_angle_bracket_right; + result.loc.end = index; + return result; + } + } + break; + case State.period: + switch (c) { + case '.': { + state = State.period_2; break; + } + case '*': { + state = State.period_asterisk; break; + } + default: { + result.tag = Tag.period; + result.loc.end = index; + return result; + } + } + break; + case State.period_2: + switch (c) { + case '.': { + result.tag = Tag.ellipsis3; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.ellipsis2; + result.loc.end = index; + return result; + } + } + break; + case State.period_asterisk: + switch (c) { + case '*': { + result.tag = Tag.invalid_periodasterisks; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.period_asterisk; + result.loc.end = index; + return result; + } + } + break; + case State.slash: + switch (c) { + case '/': { + state = State.line_comment_start; + break; + } + case '=': { + result.tag = Tag.slash_equal; + index += 1; + result.loc.end = index; + return result; + } + default: { + result.tag = Tag.slash; + result.loc.end = index; + return result; + } + } break; + case State.line_comment_start: + switch (c) { + case 0: { + if (index != raw_source.length) { + result.tag = Tag.invalid; + index += 1; + } + result.loc.end = index; + return result; + } + case '/': { + state = State.doc_comment_start; break; + } + case '!': { + result.tag = Tag.container_doc_comment; + state = State.doc_comment; break; + } + case '\n': { + state = State.start; + result.loc.start = index + 1; break; + } + case '\t': + state = State.line_comment; break; + default: { + state = State.line_comment; + //TODO: PORT + //checkLiteralCharacter(); + break; + } + } break; + case State.doc_comment_start: + switch (c) { + case '/': { + state = State.line_comment; break; + } + case 0: + case '\n': + { + result.tag = Tag.doc_comment; + result.loc.end = index; + return result; + } + case '\t': { + state = State.doc_comment; + result.tag = Tag.doc_comment; break; + } + default: { + state = State.doc_comment; + result.tag = Tag.doc_comment; + //TODO: PORT + //checkLiteralCharacter(); + break; + } + } break; + case State.line_comment: + switch (c) { + case 0: { + if (index != raw_source.length) { + result.tag = Tag.invalid; + index += 1; + } + result.loc.end = index; + return result; + } + case '\n': { + state = State.start; + result.loc.start = index + 1; + break; + } + case '\t': break; + //TODO: PORT + //default: checkLiteralCharacter(), + } break; + case State.doc_comment: + switch (c) { + case 0:// + case '\n': + result.loc.end = index; + return result; + case '\t': break; + //TODOL PORT + // default: checkLiteralCharacter(), + default: + break; + } break; + case State.int: + switch (c) { + case '.': + state = State.int_period; + break; + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + break; + case 'e': + case 'E': + case 'p': + case 'P': + state = State.int_exponent; + break; + default: result.loc.end = index; + return result; + } break; + case State.int_exponent: + switch (c) { + case '-': + case '+': + { + `` + state = State.float; break; + } + default: { + index -= 1; + state = State.int; break; + } + } break; + case State.int_period: switch (c) { + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + state = State.float; break; + } + case 'e': + case 'E': + case 'p': + case 'P': + state = State.float_exponent; break; + default: { + index -= 1; + result.loc.end = index; + return result; + } + } break; + case State.float: + switch (c) { + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + break; + + case 'e': + case 'E': + case 'p': + case 'P': + state = State.float_exponent; break; + default: result.loc.end = index; + return result; + } break; + case State.float_exponent: + switch (c) { + case '-': + case '+': + state = State.float; break; + default: { + index -= 1; + state = State.float; break; + } + } + break; + } + index += 1; + } + + //TODO: PORT + // if (result.tag == Tag.eof) { + // if (pending_invalid_token) | token | { + // pending_invalid_token = null; + // return token; + // } + // result.loc.start = sindex; + // } + + result.loc.end = index; + return result; + + } + + toks = [] + + for (let i = 0; i < raw_source.length * 2; i++) { + const tok = next(); + toks.push(tok); + + if (tok.tag == Tag.eof) { + break; + } + } + + return toks; +} + + +function generate_html_for_src(src) { + var toks = tokenize_zig_source(src); + var html = []; + + html.push("<pre>"); + let offset = 0; + for (let z = 0; z < toks.length; z++) { + const t = toks[z]; + + if(t.tag == Tag.eof) + break; + + const spanStart = `<span class="zig_${t.tag}">` + const spanEnd = `</span>` + + src = `${src.slice(0, t.loc.start + offset)}` + spanStart + `${src.slice(t.loc.start + offset)}`; + offset += spanStart.length; + + src = `${src.slice(0, t.loc.end + offset)}` + spanEnd + `${src.slice(t.loc.end + offset)}`; + offset += spanEnd.length; + } + + + html.push(src); + html.push("</pre>"); + + return html.join(""); + +} + +//const fs = require('fs'); +//const src = fs.readFileSync("../std/c.zig", 'utf8'); +//console.log(generate_html_for_src(src)); + + +// gist for zig_lexer_test code: https://gist.github.com/Myvar/2684ba4fb86b975274629d6f21eddc7b +// // Just for testing not to commit in pr +// var isNode = new Function("try {return this===global;}catch(e){return false;}"); +// if (isNode()) { + + +// //const s = "const std = @import(\"std\");"; +// //const toksa = tokenize_zig_source(s); +// //dump_tokens(toksa, s); +// //console.log(JSON.stringify(toksa)); + +// const fs = require('fs'); + +// function testFile(fileName) { +// //console.log(fileName); +// var exec = require('child_process').execFileSync; +// var passed = true; +// const zig_data = exec('./zig_lexer_test', [fileName]); +// const data = fs.readFileSync(fileName, 'utf8'); + +// const toks = tokenize_zig_source(data); +// const a_json = toks; + +// // dump_tokens(a_json, data); +// // return; + +// const b_json = JSON.parse(zig_data.toString()); + +// if (a_json.length !== b_json.length) { +// console.log("FAILED a and be is not the same length"); +// passed = false; +// //return; +// } + +// let len = a_json.length; +// if (len >= b_json.length) len = b_json.length; + +// for (let i = 0; i < len; i++) { +// const a = a_json[i]; +// const b = b_json[i]; + +// // console.log(a.tag + " == " + b.tag); + +// if (a.tag !== b.tag) { + +// // console.log("Around here:"); +// // console.log( +// // data.substring(b_json[i - 2].loc.start, b_json[i - 2].loc.end), +// // data.substring(b_json[i - 1].loc.start, b_json[i - 1].loc.end), +// // data.substring(b_json[i].loc.start, b_json[i].loc.end), +// // data.substring(b_json[i + 1].loc.start, b_json[i + 1].loc.end), +// // data.substring(b_json[i + 2].loc.start, b_json[i + 2].loc.end), +// // ); + +// console.log("TAG: a != b"); +// console.log("js", a.tag); +// console.log("zig", b.tag); +// passed = false; +// return; +// } + +// if (a.tag !== Tag.eof && a.loc.start !== b.loc.start) { +// console.log("START: a != b"); + +// console.log("js", "\"" + data.substring(a_json[i ].loc.start, a_json[i].loc.end) + "\""); +// console.log("zig", "\"" + data.substring(b_json[i ].loc.start, b_json[i].loc.end) + "\""); + + +// passed = false; +// return; +// } + +// // if (a.tag !== Tag.eof && a.loc.end !== b.loc.end) { +// // console.log("END: a != b"); +// // // console.log("Around here:"); +// // // console.log( +// // // // data.substring(b_json[i - 2].loc.start, b_json[i - 2].loc.end), +// // // // data.substring(b_json[i - 1].loc.start, b_json[i - 1].loc.end), +// // // data.substring(b_json[i ].loc.start, b_json[i].loc.end), +// // // // data.substring(b_json[i + 1].loc.start, b_json[i + 1].loc.end), +// // // // data.substring(b_json[i + 2].loc.start, b_json[i + 2].loc.end), +// // // ); +// // console.log("js", "\"" + data.substring(a_json[i ].loc.start, a_json[i].loc.end) + "\""); +// // console.log("zig", "\"" + data.substring(b_json[i ].loc.start, b_json[i].loc.end) + "\""); +// // passed = false; +// // return; +// // } +// } +// return passed; +// } +// var path = require('path'); +// function fromDir(startPath, filter) { +// if (!fs.existsSync(startPath)) { +// console.log("no dir ", startPath); +// return; +// } +// var files = fs.readdirSync(startPath); +// for (var i = 0; i < files.length; i++) { +// var filename = path.join(startPath, files[i]); +// var stat = fs.lstatSync(filename); +// if (stat.isDirectory()) { +// fromDir(filename, filter); //recurse +// } else if (filename.endsWith(filter)) { +// try { +// console.log('-- TESTING: ', filename); +// console.log("\t\t", testFile(filename)); +// } +// catch { +// } +// }; +// }; +// }; +// fromDir('../std', '.zig'); +// //console.log(testFile("/home/myvar/code/zig/lib/std/fmt/errol.zig")); +// //console.log(testFile("test.zig")); +// }
\ No newline at end of file |
