diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-11-05 03:39:01 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-11-05 03:39:01 -0500 |
| commit | dc63426b1eaef9c65152c8e052e5552e593e9382 (patch) | |
| tree | 4baa141bdd1da68a3f15164988317350f95b76c7 /src | |
| parent | f24ceec35a6fd1e5e6a671461b78919b5f588a32 (diff) | |
| parent | a9002156a09130038abcf418609fea725ce71bc2 (diff) | |
| download | zig-dc63426b1eaef9c65152c8e052e5552e593e9382.tar.gz zig-dc63426b1eaef9c65152c8e052e5552e593e9382.zip | |
Merge pull request #17866 from ziglang/reduce-inline-import
zig reduce: support inlining `@import` and more
Diffstat (limited to 'src')
| -rw-r--r-- | src/reduce.zig | 164 | ||||
| -rw-r--r-- | src/reduce/Walk.zig | 201 |
2 files changed, 306 insertions, 59 deletions
diff --git a/src/reduce.zig b/src/reduce.zig index 878f99a0fa..f11b2a6ae1 100644 --- a/src/reduce.zig +++ b/src/reduce.zig @@ -5,6 +5,8 @@ const assert = std.debug.assert; const fatal = @import("./main.zig").fatal; const Ast = std.zig.Ast; const Walk = @import("reduce/Walk.zig"); +const AstGen = @import("AstGen.zig"); +const Zir = @import("Zir.zig"); const usage = \\zig reduce [options] ./checker root_source_file.zig [-- [argv]] @@ -39,8 +41,6 @@ const Interestingness = enum { interesting, unknown, boring }; // - add support for parsing the module flags // - more fancy transformations // - @import inlining of modules -// - @import inlining of files -// - deleting unused functions and other globals // - removing statements or blocks of code // - replacing operands of `and` and `or` with `true` and `false` // - replacing if conditions with `true` and `false` @@ -109,8 +109,14 @@ pub fn main(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var rendered = std.ArrayList(u8).init(gpa); defer rendered.deinit(); - var tree = try parse(gpa, arena, root_source_file_path); - defer tree.deinit(gpa); + var astgen_input = std.ArrayList(u8).init(gpa); + defer astgen_input.deinit(); + + var tree = try parse(gpa, root_source_file_path); + defer { + gpa.free(tree.source); + tree.deinit(gpa); + } if (!skip_smoke_test) { std.debug.print("smoke testing the interestingness check...\n", .{}); @@ -126,6 +132,10 @@ pub fn main(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var fixups: Ast.Fixups = .{}; defer fixups.deinit(gpa); + + var more_fixups: Ast.Fixups = .{}; + defer more_fixups.deinit(gpa); + var rng = std.rand.DefaultPrng.init(seed); // 1. Walk the AST of the source file looking for independent @@ -145,7 +155,7 @@ pub fn main(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var transformations = std.ArrayList(Walk.Transformation).init(gpa); defer transformations.deinit(); - try Walk.findTransformations(&tree, &transformations); + try Walk.findTransformations(arena, &tree, &transformations); sortTransformations(transformations.items, rng.random()); fresh: while (transformations.items.len > 0) { @@ -156,29 +166,80 @@ pub fn main(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { var start_index: usize = 0; while (start_index < transformations.items.len) { - subset_size = @max(1, subset_size / 2); + const prev_subset_size = subset_size; + subset_size = @max(1, subset_size * 3 / 4); + if (prev_subset_size > 1 and subset_size == 1) + start_index = 0; const this_set = transformations.items[start_index..][0..subset_size]; - try transformationsToFixups(gpa, this_set, &fixups); + std.debug.print("trying {d} random transformations: ", .{subset_size}); + for (this_set[0..@min(this_set.len, 20)]) |t| { + std.debug.print("{s} ", .{@tagName(t)}); + } + std.debug.print("\n", .{}); + try transformationsToFixups(gpa, arena, root_source_file_path, this_set, &fixups); rendered.clearRetainingCapacity(); try tree.renderToArrayList(&rendered, fixups); + + // The transformations we applied may have resulted in unused locals, + // in which case we would like to add the respective discards. + { + try astgen_input.resize(rendered.items.len); + @memcpy(astgen_input.items, rendered.items); + try astgen_input.append(0); + const source_with_null = astgen_input.items[0 .. astgen_input.items.len - 1 :0]; + var astgen_tree = try Ast.parse(gpa, source_with_null, .zig); + defer astgen_tree.deinit(gpa); + if (astgen_tree.errors.len != 0) { + @panic("syntax errors occurred"); + } + var zir = try AstGen.generate(gpa, astgen_tree); + defer zir.deinit(gpa); + + if (zir.hasCompileErrors()) { + more_fixups.clearRetainingCapacity(); + const payload_index = zir.extra[@intFromEnum(Zir.ExtraIndex.compile_errors)]; + assert(payload_index != 0); + const header = zir.extraData(Zir.Inst.CompileErrors, payload_index); + var extra_index = header.end; + for (0..header.data.items_len) |_| { + const item = zir.extraData(Zir.Inst.CompileErrors.Item, extra_index); + extra_index = item.end; + const msg = zir.nullTerminatedString(item.data.msg); + if (mem.eql(u8, msg, "unused local constant") or + mem.eql(u8, msg, "unused local variable") or + mem.eql(u8, msg, "unused function parameter") or + mem.eql(u8, msg, "unused capture")) + { + const ident_token = item.data.token; + try more_fixups.unused_var_decls.put(gpa, ident_token, {}); + } else { + std.debug.print("found other ZIR error: '{s}'\n", .{msg}); + } + } + if (more_fixups.count() != 0) { + rendered.clearRetainingCapacity(); + try astgen_tree.renderToArrayList(&rendered, more_fixups); + } + } + } + try std.fs.cwd().writeFile(root_source_file_path, rendered.items); + //std.debug.print("trying this code:\n{s}\n", .{rendered.items}); const interestingness = try runCheck(arena, interestingness_argv.items); - std.debug.print("{d} random transformations: {s}. {d} remaining\n", .{ - subset_size, @tagName(interestingness), transformations.items.len - start_index, + std.debug.print("{d} random transformations: {s}. {d}/{d}\n", .{ + subset_size, @tagName(interestingness), start_index, transformations.items.len, }); switch (interestingness) { .interesting => { - const new_tree = try parse(gpa, arena, root_source_file_path); + const new_tree = try parse(gpa, root_source_file_path); + gpa.free(tree.source); tree.deinit(gpa); tree = new_tree; - try Walk.findTransformations(&tree, &transformations); - // Resetting based on the seed again means we will get the same - // results if restarting the reduction process from this new point. - rng = std.rand.DefaultPrng.init(seed); + try Walk.findTransformations(arena, &tree, &transformations); sortTransformations(transformations.items, rng.random()); continue :fresh; @@ -188,6 +249,11 @@ pub fn main(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { // If we tested only one transformation, move on to the next one. if (subset_size == 1) { start_index += 1; + } else { + start_index += subset_size; + if (start_index + subset_size > transformations.items.len) { + start_index = 0; + } } }, } @@ -241,6 +307,8 @@ fn runCheck(arena: std.mem.Allocator, argv: []const []const u8) !Interestingness fn transformationsToFixups( gpa: Allocator, + arena: Allocator, + root_source_file_path: []const u8, transforms: []const Walk.Transformation, fixups: *Ast.Fixups, ) !void { @@ -253,21 +321,77 @@ fn transformationsToFixups( .delete_node => |decl_node| { try fixups.omit_nodes.put(gpa, decl_node, {}); }, + .delete_var_decl => |delete_var_decl| { + try fixups.omit_nodes.put(gpa, delete_var_decl.var_decl_node, {}); + for (delete_var_decl.references.items) |ident_node| { + try fixups.replace_nodes.put(gpa, ident_node, "undefined"); + } + }, .replace_with_undef => |node| { - try fixups.replace_nodes.put(gpa, node, {}); + try fixups.replace_nodes.put(gpa, node, "undefined"); + }, + .inline_imported_file => |inline_imported_file| { + const full_imported_path = try std.fs.path.join(gpa, &.{ + std.fs.path.dirname(root_source_file_path) orelse ".", + inline_imported_file.imported_string, + }); + defer gpa.free(full_imported_path); + var other_file_ast = try parse(gpa, full_imported_path); + defer { + gpa.free(other_file_ast.source); + other_file_ast.deinit(gpa); + } + + var inlined_fixups: Ast.Fixups = .{}; + defer inlined_fixups.deinit(gpa); + if (std.fs.path.dirname(inline_imported_file.imported_string)) |dirname| { + inlined_fixups.rebase_imported_paths = dirname; + } + for (inline_imported_file.in_scope_names.keys()) |name| { + // This name needs to be mangled in order to not cause an + // ambiguous reference error. + var i: u32 = 2; + const mangled = while (true) : (i += 1) { + const mangled = try std.fmt.allocPrint(gpa, "{s}{d}", .{ name, i }); + if (!inline_imported_file.in_scope_names.contains(mangled)) + break mangled; + gpa.free(mangled); + }; + try inlined_fixups.rename_identifiers.put(gpa, name, mangled); + } + defer { + for (inlined_fixups.rename_identifiers.values()) |v| { + gpa.free(v); + } + } + + var other_source = std.ArrayList(u8).init(gpa); + defer other_source.deinit(); + try other_source.appendSlice("struct {\n"); + try other_file_ast.renderToArrayList(&other_source, inlined_fixups); + try other_source.appendSlice("}"); + + try fixups.replace_nodes.put( + gpa, + inline_imported_file.builtin_call_node, + try arena.dupe(u8, other_source.items), + ); }, }; } -fn parse(gpa: Allocator, arena: Allocator, root_source_file_path: []const u8) !Ast { - const source_code = try std.fs.cwd().readFileAllocOptions( - arena, - root_source_file_path, +fn parse(gpa: Allocator, file_path: []const u8) !Ast { + const source_code = std.fs.cwd().readFileAllocOptions( + gpa, + file_path, std.math.maxInt(u32), null, 1, 0, - ); + ) catch |err| { + fatal("unable to open '{s}': {s}", .{ file_path, @errorName(err) }); + }; + errdefer gpa.free(source_code); var tree = try Ast.parse(gpa, source_code, .zig); errdefer tree.deinit(gpa); diff --git a/src/reduce/Walk.zig b/src/reduce/Walk.zig index 5ef341fcbf..94ef0eeb26 100644 --- a/src/reduce/Walk.zig +++ b/src/reduce/Walk.zig @@ -2,11 +2,15 @@ const std = @import("std"); const Ast = std.zig.Ast; const Walk = @This(); const assert = std.debug.assert; +const BuiltinFn = @import("../BuiltinFn.zig"); ast: *const Ast, transformations: *std.ArrayList(Transformation), unreferenced_globals: std.StringArrayHashMapUnmanaged(Ast.Node.Index), +in_scope_names: std.StringArrayHashMapUnmanaged(u32), +replace_names: std.StringArrayHashMapUnmanaged(u32), gpa: std.mem.Allocator, +arena: std.mem.Allocator, pub const Transformation = union(enum) { /// Replace the fn decl AST Node with one whose body is only `@trap()` with @@ -14,23 +18,51 @@ pub const Transformation = union(enum) { gut_function: Ast.Node.Index, /// Omit a global declaration. delete_node: Ast.Node.Index, + /// Delete a local variable declaration and replace all of its references + /// with `undefined`. + delete_var_decl: struct { + var_decl_node: Ast.Node.Index, + /// Identifier nodes that reference the variable. + references: std.ArrayListUnmanaged(Ast.Node.Index), + }, /// Replace an expression with `undefined`. replace_with_undef: Ast.Node.Index, + /// Replace an `@import` with the imported file contents wrapped in a struct. + inline_imported_file: InlineImportedFile, + + pub const InlineImportedFile = struct { + builtin_call_node: Ast.Node.Index, + imported_string: []const u8, + /// Identifier names that must be renamed in the inlined code or else + /// will cause ambiguous reference errors. + in_scope_names: std.StringArrayHashMapUnmanaged(void), + }; }; pub const Error = error{OutOfMemory}; /// The result will be priority shuffled. -pub fn findTransformations(ast: *const Ast, transformations: *std.ArrayList(Transformation)) !void { +pub fn findTransformations( + arena: std.mem.Allocator, + ast: *const Ast, + transformations: *std.ArrayList(Transformation), +) !void { transformations.clearRetainingCapacity(); var walk: Walk = .{ .ast = ast, .transformations = transformations, .gpa = transformations.allocator, + .arena = arena, .unreferenced_globals = .{}, + .in_scope_names = .{}, + .replace_names = .{}, }; - defer walk.unreferenced_globals.deinit(walk.gpa); + defer { + walk.unreferenced_globals.deinit(walk.gpa); + walk.in_scope_names.deinit(walk.gpa); + walk.replace_names.deinit(walk.gpa); + } try walkMembers(&walk, walk.ast.rootDecls()); @@ -43,14 +75,18 @@ pub fn findTransformations(ast: *const Ast, transformations: *std.ArrayList(Tran fn walkMembers(w: *Walk, members: []const Ast.Node.Index) Error!void { // First we scan for globals so that we can delete them while walking. - try scanDecls(w, members); + try scanDecls(w, members, .add); for (members) |member| { try walkMember(w, member); } + + try scanDecls(w, members, .remove); } -fn scanDecls(w: *Walk, members: []const Ast.Node.Index) Error!void { +const ScanDeclsAction = enum { add, remove }; + +fn scanDecls(w: *Walk, members: []const Ast.Node.Index, action: ScanDeclsAction) Error!void { const ast = w.ast; const gpa = w.gpa; const node_tags = ast.nodes.items(.tag); @@ -74,9 +110,27 @@ fn scanDecls(w: *Walk, members: []const Ast.Node.Index) Error!void { else => continue, }; + assert(token_tags[name_token] == .identifier); const name_bytes = ast.tokenSlice(name_token); - try w.unreferenced_globals.put(gpa, name_bytes, member_node); + + switch (action) { + .add => { + try w.unreferenced_globals.put(gpa, name_bytes, member_node); + + const gop = try w.in_scope_names.getOrPut(gpa, name_bytes); + if (!gop.found_existing) gop.value_ptr.* = 0; + gop.value_ptr.* += 1; + }, + .remove => { + const entry = w.in_scope_names.getEntry(name_bytes).?; + if (entry.value_ptr.* <= 1) { + assert(w.in_scope_names.swapRemove(name_bytes)); + } else { + entry.value_ptr.* -= 1; + } + }, + } } } @@ -89,9 +143,10 @@ fn walkMember(w: *Walk, decl: Ast.Node.Index) Error!void { try walkExpression(w, fn_proto); const body_node = datas[decl].rhs; if (!isFnBodyGutted(ast, body_node)) { + w.replace_names.clearRetainingCapacity(); try w.transformations.append(.{ .gut_function = decl }); + try walkExpression(w, body_node); } - try walkExpression(w, body_node); }, .fn_proto_simple, .fn_proto_multi, @@ -121,7 +176,10 @@ fn walkMember(w: *Walk, decl: Ast.Node.Index) Error!void { .container_field_init, .container_field_align, .container_field, - => try walkContainerField(w, ast.fullContainerField(decl).?), + => { + try w.transformations.append(.{ .delete_node = decl }); + try walkContainerField(w, ast.fullContainerField(decl).?); + }, .@"comptime" => { try w.transformations.append(.{ .delete_node = decl }); @@ -140,7 +198,15 @@ fn walkExpression(w: *Walk, node: Ast.Node.Index) Error!void { const node_tags = ast.nodes.items(.tag); const datas = ast.nodes.items(.data); switch (node_tags[node]) { - .identifier => try walkIdentifier(w, main_tokens[node]), + .identifier => { + const name_ident = main_tokens[node]; + assert(token_tags[name_ident] == .identifier); + const name_bytes = ast.tokenSlice(name_ident); + _ = w.unreferenced_globals.swapRemove(name_bytes); + if (w.replace_names.get(name_bytes)) |index| { + try w.transformations.items[index].delete_var_decl.references.append(w.arena, node); + } + }, .number_literal, .char_literal, @@ -437,16 +503,16 @@ fn walkExpression(w: *Walk, node: Ast.Node.Index) Error!void { .builtin_call_two, .builtin_call_two_comma => { if (datas[node].lhs == 0) { - return walkBuiltinCall(w, main_tokens[node], &.{}); + return walkBuiltinCall(w, node, &.{}); } else if (datas[node].rhs == 0) { - return walkBuiltinCall(w, main_tokens[node], &.{datas[node].lhs}); + return walkBuiltinCall(w, node, &.{datas[node].lhs}); } else { - return walkBuiltinCall(w, main_tokens[node], &.{ datas[node].lhs, datas[node].rhs }); + return walkBuiltinCall(w, node, &.{ datas[node].lhs, datas[node].rhs }); } }, .builtin_call, .builtin_call_comma => { const params = ast.extra_data[datas[node].lhs..datas[node].rhs]; - return walkBuiltinCall(w, main_tokens[node], params); + return walkBuiltinCall(w, node, params); }, .fn_proto_simple, @@ -537,9 +603,12 @@ fn walkGlobalVarDecl(w: *Walk, decl_node: Ast.Node.Index, var_decl: Ast.full.Var try walkExpression(w, var_decl.ast.section_node); } - assert(var_decl.ast.init_node != 0); - - return walkExpression(w, var_decl.ast.init_node); + if (var_decl.ast.init_node != 0) { + if (!isUndefinedIdent(w.ast, var_decl.ast.init_node)) { + try w.transformations.append(.{ .replace_with_undef = var_decl.ast.init_node }); + } + try walkExpression(w, var_decl.ast.init_node); + } } fn walkLocalVarDecl(w: *Walk, var_decl: Ast.full.VarDecl) Error!void { @@ -561,12 +630,12 @@ fn walkLocalVarDecl(w: *Walk, var_decl: Ast.full.VarDecl) Error!void { try walkExpression(w, var_decl.ast.section_node); } - assert(var_decl.ast.init_node != 0); - if (!isUndefinedIdent(w.ast, var_decl.ast.init_node)) { - try w.transformations.append(.{ .replace_with_undef = var_decl.ast.init_node }); + if (var_decl.ast.init_node != 0) { + if (!isUndefinedIdent(w.ast, var_decl.ast.init_node)) { + try w.transformations.append(.{ .replace_with_undef = var_decl.ast.init_node }); + } + try walkExpression(w, var_decl.ast.init_node); } - - return walkExpression(w, var_decl.ast.init_node); } fn walkContainerField(w: *Walk, field: Ast.full.ContainerField) Error!void { @@ -576,7 +645,9 @@ fn walkContainerField(w: *Walk, field: Ast.full.ContainerField) Error!void { if (field.ast.align_expr != 0) { try walkExpression(w, field.ast.align_expr); // alignment } - try walkExpression(w, field.ast.value_expr); // value + if (field.ast.value_expr != 0) { + try walkExpression(w, field.ast.value_expr); // value + } } fn walkBlock( @@ -594,9 +665,34 @@ fn walkBlock( .local_var_decl, .simple_var_decl, .aligned_var_decl, - => try walkLocalVarDecl(w, ast.fullVarDecl(stmt).?), - - else => try walkExpression(w, stmt), + => { + const var_decl = ast.fullVarDecl(stmt).?; + if (var_decl.ast.init_node != 0 and + isUndefinedIdent(w.ast, var_decl.ast.init_node)) + { + try w.transformations.append(.{ .delete_var_decl = .{ + .var_decl_node = stmt, + .references = .{}, + } }); + const name_tok = var_decl.ast.mut_token + 1; + const name_bytes = ast.tokenSlice(name_tok); + try w.replace_names.put(w.gpa, name_bytes, @intCast(w.transformations.items.len - 1)); + } else { + try walkLocalVarDecl(w, var_decl); + } + }, + + else => { + switch (categorizeStmt(ast, stmt)) { + // Don't try to remove `_ = foo;` discards; those are handled separately. + .discard_identifier => {}, + // definitely try to remove `_ = undefined;` though. + .discard_undefined, .trap_call, .other => { + try w.transformations.append(.{ .delete_node = stmt }); + }, + } + try walkExpression(w, stmt); + }, } } } @@ -680,10 +776,35 @@ fn walkContainerDecl( fn walkBuiltinCall( w: *Walk, - builtin_token: Ast.TokenIndex, + call_node: Ast.Node.Index, params: []const Ast.Node.Index, ) Error!void { - _ = builtin_token; + const ast = w.ast; + const main_tokens = ast.nodes.items(.main_token); + const builtin_token = main_tokens[call_node]; + const builtin_name = ast.tokenSlice(builtin_token); + const info = BuiltinFn.list.get(builtin_name).?; + switch (info.tag) { + .import => { + const operand_node = params[0]; + const str_lit_token = main_tokens[operand_node]; + const token_bytes = ast.tokenSlice(str_lit_token); + if (std.mem.endsWith(u8, token_bytes, ".zig\"")) { + const imported_string = std.zig.string_literal.parseAlloc(w.arena, token_bytes) catch + unreachable; + try w.transformations.append(.{ .inline_imported_file = .{ + .builtin_call_node = call_node, + .imported_string = imported_string, + .in_scope_names = try std.StringArrayHashMapUnmanaged(void).init( + w.arena, + w.in_scope_names.keys(), + &.{}, + ), + } }); + } + }, + else => {}, + } for (params) |param_node| { try walkExpression(w, param_node); } @@ -821,6 +942,7 @@ fn isFnBodyGutted(ast: *const Ast, body_node: Ast.Node.Index) bool { } const StmtCategory = enum { + discard_undefined, discard_identifier, trap_call, other, @@ -846,8 +968,14 @@ fn categorizeStmt(ast: *const Ast, stmt: Ast.Node.Index) StmtCategory { }, .assign => { const infix = datas[stmt]; - if (isDiscardIdent(ast, infix.lhs) and node_tags[infix.rhs] == .identifier) - return .discard_identifier; + if (isDiscardIdent(ast, infix.lhs) and node_tags[infix.rhs] == .identifier) { + const name_bytes = ast.tokenSlice(main_tokens[infix.rhs]); + if (std.mem.eql(u8, name_bytes, "undefined")) { + return .discard_undefined; + } else { + return .discard_identifier; + } + } return .other; }, else => return .other, @@ -867,26 +995,21 @@ fn categorizeBuiltinCall( } fn isDiscardIdent(ast: *const Ast, node: Ast.Node.Index) bool { - const node_tags = ast.nodes.items(.tag); - const main_tokens = ast.nodes.items(.main_token); - switch (node_tags[node]) { - .identifier => { - const token_index = main_tokens[node]; - const name_bytes = ast.tokenSlice(token_index); - return std.mem.eql(u8, name_bytes, "_"); - }, - else => return false, - } + return isMatchingIdent(ast, node, "_"); } fn isUndefinedIdent(ast: *const Ast, node: Ast.Node.Index) bool { + return isMatchingIdent(ast, node, "undefined"); +} + +fn isMatchingIdent(ast: *const Ast, node: Ast.Node.Index, string: []const u8) bool { const node_tags = ast.nodes.items(.tag); const main_tokens = ast.nodes.items(.main_token); switch (node_tags[node]) { .identifier => { const token_index = main_tokens[node]; const name_bytes = ast.tokenSlice(token_index); - return std.mem.eql(u8, name_bytes, "undefined"); + return std.mem.eql(u8, name_bytes, string); }, else => return false, } |
