From 2a990d69669c2a2cd16134e8ebbd2750060f8071 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 27 May 2021 16:32:35 -0700
Subject: stage1: rework tokenizer to match stage2

 * Extracts AstGen logic from ir.cpp into astgen.cpp. Reduces the
   largest file of stage1 from 33,551 lines to 25,510.
 * tokenizer: rework it completely to match the stage2 tokenizer logic.
   They can now be maintained together; when one is changed, the other
   can be changed in the same way.
   - Each token now takes up 13 bytes instead of 64 bytes. The tokenizer
     does not parse char literals, string literals, integer literals,
     etc into meaningful data. Instead, that happens during parsing or
     astgen.
   - no longer store line offsets. Error messages scan source
     files to find the line/column as needed (same as stage2).
   - main loop: instead of checking the loop, handle a null byte
     explicitly in the switch statements. This is a nice improvement
     that we may want to backport to stage2.
   - delete some dead tokens, artifacts of past syntax that no longer
     exists.
 * Parser: fix a TODO by parsing builtin functions as tokens rather than
   `@` as a separate token. This is how stage2 does it.
 * Remove some debugging infrastructure. These will need to be redone,
   if at all, as the code migrates to match stage2.
   - remove the ast_render code.
   - remove the IR debugging stuff
   - remove teh token printing code
---
 src/stage1/dump_analysis.cpp | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

(limited to 'src/stage1/dump_analysis.cpp')

diff --git a/src/stage1/dump_analysis.cpp b/src/stage1/dump_analysis.cpp
index 69a797bdaf..15cd7c2874 100644
--- a/src/stage1/dump_analysis.cpp
+++ b/src/stage1/dump_analysis.cpp
@@ -1084,19 +1084,43 @@ static void anal_dump_type(AnalDumpCtx *ctx, ZigType *ty) {
     jw_end_object(jw);
 }
 
+static Buf *collect_doc_comments(RootStruct *root_struct, TokenIndex first_token) {
+    if (first_token == 0)
+        return nullptr;
+
+    TokenId *token_ids = root_struct->token_ids;
+    TokenLoc *token_locs = root_struct->token_locs;
+    Buf *str = buf_alloc();
+    const char *source = buf_ptr(root_struct->source_code);
+    TokenIndex doc_token = first_token;
+    for (;token_ids[doc_token] == TokenIdDocComment; doc_token += 1) {
+        // chops off '///' but leaves '\n'
+        uint32_t start_pos = token_locs[doc_token].offset;
+        uint32_t token_len = 0;
+        while (source[start_pos + token_len] != '\n' &&
+               source[start_pos + token_len] != 0)
+        {
+            token_len += 1;
+        }
+        buf_append_mem(str, source + start_pos + 3, token_len - 3);
+    }
+    return str;
+}
+
 static void anal_dump_node(AnalDumpCtx *ctx, const AstNode *node) {
     JsonWriter *jw = &ctx->jw;
 
     jw_begin_object(jw);
 
     jw_object_field(jw, "file");
-    anal_dump_file_ref(ctx, node->owner->data.structure.root_struct->path);
+    RootStruct *root_struct = node->owner->data.structure.root_struct;
+    anal_dump_file_ref(ctx, root_struct->path);
 
     jw_object_field(jw, "line");
-    jw_int(jw, node->line);
+    jw_int(jw, root_struct->token_locs[node->main_token].line);
 
     jw_object_field(jw, "col");
-    jw_int(jw, node->column);
+    jw_int(jw, root_struct->token_locs[node->main_token].column);
 
     const Buf *doc_comments_buf = nullptr;
     const Buf *name_buf = nullptr;
@@ -1107,30 +1131,30 @@ static void anal_dump_node(AnalDumpCtx *ctx, const AstNode *node) {
 
     switch (node->type) {
         case NodeTypeParamDecl:
-            doc_comments_buf = &node->data.param_decl.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.param_decl.doc_comments);
             name_buf = node->data.param_decl.name;
             is_var_args = node->data.param_decl.is_var_args;
             is_noalias = node->data.param_decl.is_noalias;
             is_comptime = node->data.param_decl.is_comptime;
             break;
         case NodeTypeFnProto:
-            doc_comments_buf = &node->data.fn_proto.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.fn_proto.doc_comments);
             field_nodes = &node->data.fn_proto.params;
             is_var_args = node->data.fn_proto.is_var_args;
             break;
         case NodeTypeVariableDeclaration:
-            doc_comments_buf = &node->data.variable_declaration.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.variable_declaration.doc_comments);
             break;
         case NodeTypeErrorSetField:
-            doc_comments_buf = &node->data.err_set_field.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.err_set_field.doc_comments);
             break;
         case NodeTypeStructField:
-            doc_comments_buf = &node->data.struct_field.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.struct_field.doc_comments);
             name_buf = node->data.struct_field.name;
             break;
         case NodeTypeContainerDecl:
             field_nodes = &node->data.container_decl.fields;
-            doc_comments_buf = &node->data.container_decl.doc_comments;
+            doc_comments_buf = collect_doc_comments(root_struct, node->data.container_decl.doc_comments);
             break;
         default:
             break;
-- 
cgit v1.2.3