From 9362f382ab7023592cc1d71044217b847b122406 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 2 Jan 2021 12:32:30 -0700 Subject: stage2: implement function call inlining in the frontend * remove the -Ddump-zir thing. that's handled through --verbose-ir * rework Fn to have an is_inline flag without requiring any more memory on the heap per function. * implement a rough first version of dumping typed zir (tzir) which is a lot more helpful for debugging than what we had before. We don't have a way to parse it though. * keep track of whether the inline-ness of a function changes because if it does we have to go update callsites. * add compile error for inline and export used together. inline function calls and comptime function calls are implemented the same way. A block instruction is set up to capture the result, and then a scope is set up that has a flag for is_comptime and some state if the scope is being inlined. when analyzing `ret` instructions, zig looks for inlining state in the scope, and if found, treats `ret` as a `break` instruction instead, with the target block being the one set up at the inline callsite. Follow-up items: * Complete out the debug TZIR dumping code. * Don't redundantly generate ZIR for each inline/comptime function call. Instead we should add a new state enum tag to Fn. * comptime and inlining branch quotas. * Add more test cases. --- src/Module.zig | 150 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 36 deletions(-) (limited to 'src/Module.zig') diff --git a/src/Module.zig b/src/Module.zig index 29c19c09a0..db76ecd5db 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -286,23 +286,40 @@ pub const Decl = struct { /// Extern functions do not have this data structure; they are represented by /// the `Decl` only, with a `Value` tag of `extern_fn`. pub const Fn = struct { - /// This memory owned by the Decl's TypedValue.Managed arena allocator. - analysis: union(enum) { + bits: packed struct { + /// Get and set this field via `analysis` and `setAnalysis`. + state: Analysis.Tag, + /// We carry this state into `Fn` instead of leaving it in the AST so that + /// analysis of function calls can happen even on functions whose AST has + /// been unloaded from memory. + is_inline: bool, + unused_bits: u4 = 0, + }, + /// Get and set this data via `analysis` and `setAnalysis`. + data: union { + none: void, + zir: *ZIR, + body: Body, + }, + owner_decl: *Decl, + + pub const Analysis = union(Tag) { queued: *ZIR, in_progress, - /// There will be a corresponding ErrorMsg in Module.failed_decls sema_failure, - /// This Fn might be OK but it depends on another Decl which did not successfully complete - /// semantic analysis. dependency_failure, success: Body, - }, - owner_decl: *Decl, - /// This memory is temporary and points to stack memory for the duration - /// of Fn analysis. - pub const Analysis = struct { - inner_block: Scope.Block, + pub const Tag = enum(u3) { + queued, + in_progress, + /// There will be a corresponding ErrorMsg in Module.failed_decls + sema_failure, + /// This Fn might be OK but it depends on another Decl which did not + /// successfully complete semantic analysis. + dependency_failure, + success, + }; }; /// Contains un-analyzed ZIR instructions generated from Zig source AST. @@ -311,22 +328,37 @@ pub const Fn = struct { arena: std.heap.ArenaAllocator.State, }; - /// For debugging purposes. - pub fn dump(self: *Fn, mod: Module) void { - std.debug.print("Module.Function(name={s}) ", .{self.owner_decl.name}); - switch (self.analysis) { - .queued => { - std.debug.print("queued\n", .{}); + pub fn analysis(self: Fn) Analysis { + return switch (self.bits.state) { + .queued => .{ .queued = self.data.zir }, + .success => .{ .success = self.data.body }, + .in_progress => .in_progress, + .sema_failure => .sema_failure, + .dependency_failure => .dependency_failure, + }; + } + + pub fn setAnalysis(self: *Fn, anal: Analysis) void { + switch (anal) { + .queued => |zir_ptr| { + self.bits.state = .queued; + self.data = .{ .zir = zir_ptr }; }, - .in_progress => { - std.debug.print("in_progress\n", .{}); + .success => |body| { + self.bits.state = .success; + self.data = .{ .body = body }; }, - else => { - std.debug.print("\n", .{}); - zir.dumpFn(mod, self); + .in_progress, .sema_failure, .dependency_failure => { + self.bits.state = anal; + self.data = .{ .none = {} }; }, } } + + /// For debugging purposes. + pub fn dump(self: *Fn, mod: Module) void { + zir.dumpFn(mod, self); + } }; pub const Var = struct { @@ -773,13 +805,33 @@ pub const Scope = struct { instructions: ArrayListUnmanaged(*Inst), /// Points to the arena allocator of DeclAnalysis arena: *Allocator, - label: ?Label = null, + label: Label = Label.none, is_comptime: bool, - pub const Label = struct { - zir_block: *zir.Inst.Block, - results: ArrayListUnmanaged(*Inst), - block_inst: *Inst.Block, + pub const Label = union(enum) { + none, + /// This `Block` maps a block ZIR instruction to the corresponding + /// TZIR instruction for break instruction analysis. + breaking: struct { + zir_block: *zir.Inst.Block, + merges: Merges, + }, + /// This `Block` indicates that an inline function call is happening + /// and return instructions should be analyzed as a break instruction + /// to this TZIR block instruction. + inlining: struct { + /// We use this to count from 0 so that arg instructions know + /// which parameter index they are, without having to store + /// a parameter index with each arg instruction. + param_index: usize, + casted_args: []*Inst, + merges: Merges, + }, + + pub const Merges = struct { + results: ArrayListUnmanaged(*Inst), + block_inst: *Inst.Block, + }; }; /// For debugging purposes. @@ -1189,8 +1241,21 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { break :blk fn_zir; }; + const is_inline = blk: { + if (fn_proto.getExternExportInlineToken()) |maybe_inline_token| { + if (tree.token_ids[maybe_inline_token] == .Keyword_inline) { + break :blk true; + } + } + break :blk false; + }; + new_func.* = .{ - .analysis = .{ .queued = fn_zir }, + .bits = .{ + .state = .queued, + .is_inline = is_inline, + }, + .data = .{ .zir = fn_zir }, .owner_decl = decl, }; fn_payload.* = .{ @@ -1199,11 +1264,16 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { }; var prev_type_has_bits = false; + var prev_is_inline = false; var type_changed = true; if (decl.typedValueManaged()) |tvm| { prev_type_has_bits = tvm.typed_value.ty.hasCodeGenBits(); type_changed = !tvm.typed_value.ty.eql(fn_type); + if (tvm.typed_value.val.castTag(.function)) |payload| { + const prev_func = payload.data; + prev_is_inline = prev_func.bits.is_inline; + } tvm.deinit(self.gpa); } @@ -1221,18 +1291,26 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { decl.analysis = .complete; decl.generation = self.generation; - if (fn_type.hasCodeGenBits()) { + if (!is_inline and fn_type.hasCodeGenBits()) { // We don't fully codegen the decl until later, but we do need to reserve a global // offset table index for it. This allows us to codegen decls out of dependency order, // increasing how many computations can be done in parallel. try self.comp.bin_file.allocateDeclIndexes(decl); try self.comp.work_queue.writeItem(.{ .codegen_decl = decl }); - } else if (prev_type_has_bits) { + } else if (!prev_is_inline and prev_type_has_bits) { self.comp.bin_file.freeDecl(decl); } if (fn_proto.getExternExportInlineToken()) |maybe_export_token| { if (tree.token_ids[maybe_export_token] == .Keyword_export) { + if (is_inline) { + return self.failTok( + &block_scope.base, + maybe_export_token, + "export of inline function", + .{}, + ); + } const export_src = tree.token_locs[maybe_export_token].start; const name_loc = tree.token_locs[fn_proto.getNameToken().?]; const name = tree.tokenSliceLoc(name_loc); @@ -1240,7 +1318,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { try self.analyzeExport(&block_scope.base, export_src, name, decl); } } - return type_changed; + return type_changed or is_inline != prev_is_inline; }, .VarDecl => { const var_decl = @fieldParentPtr(ast.Node.VarDecl, "base", ast_node); @@ -1824,15 +1902,15 @@ pub fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { }; defer inner_block.instructions.deinit(self.gpa); - const fn_zir = func.analysis.queued; + const fn_zir = func.data.zir; defer fn_zir.arena.promote(self.gpa).deinit(); - func.analysis = .{ .in_progress = {} }; + func.setAnalysis(.in_progress); log.debug("set {s} to in_progress\n", .{decl.name}); try zir_sema.analyzeBody(self, &inner_block.base, fn_zir.body); const instructions = try arena.allocator.dupe(*Inst, inner_block.instructions.items); - func.analysis = .{ .success = .{ .instructions = instructions } }; + func.setAnalysis(.{ .success = .{ .instructions = instructions } }); log.debug("set {s} to success\n", .{decl.name}); } @@ -2329,7 +2407,7 @@ pub fn analyzeDeclRef(self: *Module, scope: *Scope, src: usize, decl: *Decl) Inn self.ensureDeclAnalyzed(decl) catch |err| { if (scope.cast(Scope.Block)) |block| { if (block.func) |func| { - func.analysis = .dependency_failure; + func.setAnalysis(.dependency_failure); } else { block.decl.analysis = .dependency_failure; } @@ -3029,7 +3107,7 @@ fn failWithOwnedErrorMsg(self: *Module, scope: *Scope, src: usize, err_msg: *Com .block => { const block = scope.cast(Scope.Block).?; if (block.func) |func| { - func.analysis = .sema_failure; + func.setAnalysis(.sema_failure); } else { block.decl.analysis = .sema_failure; block.decl.generation = self.generation; -- cgit v1.2.3