diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-01-21 16:04:26 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-01-22 00:46:35 +0100 |
| commit | 4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2 (patch) | |
| tree | 82750d896591ef9737c8e86ec0d8f847d026b901 | |
| parent | c9ae24503dc8da2e59f46619695bf4eb863fb3ac (diff) | |
| download | zig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.tar.gz zig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.zip | |
stage2: clean up preserving callee regs on the stack
Instead of using `push` and `pop` combo, we now re-use our stack
allocation mechanism which means we don't have to worry about
16-byte stack adjustments on macOS as it is handled automatically
for us. Another benefit is that we don't have to backpatch stack
offsets when pulling args from the stack.
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 104 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 34 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 7 | ||||
| -rw-r--r-- | src/arch/x86_64/PrintMir.zig | 40 |
4 files changed, 90 insertions, 95 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 522da6e25d..c204bcbad1 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -61,8 +61,6 @@ end_di_column: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{}, -stack_args_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{}, - /// Whenever there is a runtime branch, we push a Branch onto this stack, /// and pop it off when the runtime branch joins. This provides an "overlay" /// of the table of mappings from instructions to `MCValue` from within the branch. @@ -286,7 +284,6 @@ pub fn generate( defer function.exitlude_jump_relocs.deinit(bin_file.allocator); defer function.mir_instructions.deinit(bin_file.allocator); defer function.mir_extra.deinit(bin_file.allocator); - defer function.stack_args_relocs.deinit(bin_file.allocator); defer if (builtin.mode == .Debug) function.mir_to_air_map.deinit(); var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) { @@ -378,13 +375,6 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { fn gen(self: *Self) InnerError!void { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { - // push the callee_preserved_regs that were used - const backpatch_push_callee_preserved_regs_i = try self.addInst(.{ - .tag = .push_regs_from_callee_preserved_regs, - .ops = undefined, - .data = .{ .regs_to_push_or_pop = undefined }, // to be backpatched - }); - _ = try self.addInst(.{ .tag = .push, .ops = (Mir.Ops{ @@ -416,6 +406,15 @@ fn gen(self: *Self) InnerError!void { .data = undefined, }); + // push the callee_preserved_regs that were used + const backpatch_push_callee_preserved_regs_i = try self.addInst(.{ + .tag = .push_regs_from_callee_preserved_regs, + .ops = (Mir.Ops{ + .reg1 = .rbp, + }).encode(), + .data = .{ .payload = undefined }, // to be backpatched + }); + try self.genBody(self.air.getMainBody()); // TODO can single exitlude jump reloc be elided? What if it is not at the end of the code? @@ -429,6 +428,33 @@ fn gen(self: *Self) InnerError!void { self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len); } + // calculate the data for callee_preserved_regs to be pushed and popped + const callee_preserved_regs_payload = blk: { + var data = Mir.RegsToPushOrPop{ + .regs = 0, + .disp = mem.alignForwardGeneric(u32, self.next_stack_offset, 8), + }; + inline for (callee_preserved_regs) |reg, i| { + if (self.register_manager.isRegAllocated(reg)) { + data.regs |= 1 << @intCast(u5, i); + self.max_end_stack += 8; + } + } + break :blk try self.addExtra(data); + }; + + const data = self.mir_instructions.items(.data); + // backpatch the push instruction + data[backpatch_push_callee_preserved_regs_i].payload = callee_preserved_regs_payload; + // pop the callee_preserved_regs + _ = try self.addInst(.{ + .tag = .pop_regs_from_callee_preserved_regs, + .ops = (Mir.Ops{ + .reg1 = .rbp, + }).encode(), + .data = .{ .payload = callee_preserved_regs_payload }, + }); + _ = try self.addInst(.{ .tag = .dbg_epilogue_begin, .ops = undefined, @@ -450,34 +476,6 @@ fn gen(self: *Self) InnerError!void { .data = undefined, }); - // calculate the data for callee_preserved_regs to be pushed and popped - var callee_preserved_regs_push_data: u32 = 0x0; - // TODO this is required on macOS since macOS actively checks for stack alignment - // at every extern call site. As far as I can tell, macOS accounts for the typical - // function prologue first 2 instructions of: - // ... - // push rbp - // mov rsp, rbp - // ... - // Thus we don't need to adjust the stack for the first push instruction. However, - // any subsequent push of values on the stack such as when preserving registers, - // needs to be taken into account here. - var stack_adjustment: u32 = 0; - inline for (callee_preserved_regs) |reg, i| { - if (self.register_manager.isRegAllocated(reg)) { - callee_preserved_regs_push_data |= 1 << @intCast(u5, i); - stack_adjustment += @divExact(reg.size(), 8); - } - } - const data = self.mir_instructions.items(.data); - // backpatch the push instruction - data[backpatch_push_callee_preserved_regs_i].regs_to_push_or_pop = callee_preserved_regs_push_data; - // pop the callee_preserved_regs - _ = try self.addInst(.{ - .tag = .pop_regs_from_callee_preserved_regs, - .ops = undefined, - .data = .{ .regs_to_push_or_pop = callee_preserved_regs_push_data }, - }); _ = try self.addInst(.{ .tag = .ret, .ops = (Mir.Ops{ @@ -488,36 +486,28 @@ fn gen(self: *Self) InnerError!void { // Adjust the stack const stack_end = self.max_end_stack; - if (stack_end > math.maxInt(i32) - stack_adjustment) { + if (stack_end > math.maxInt(i32)) { return self.failSymbol("too much stack used in call parameters", .{}); } // TODO we should reuse this mechanism to align the stack when calling any function even if // we do not pass any args on the stack BUT we still push regs to stack with `push` inst. const aligned_stack_end = @intCast(u32, mem.alignForward(stack_end, self.stack_align)); - if (aligned_stack_end > 0 or (stack_adjustment > 0 and self.target.isDarwin())) { - const imm = if (self.target.isDarwin()) aligned_stack_end + stack_adjustment else aligned_stack_end; + if (aligned_stack_end > 0) { self.mir_instructions.set(backpatch_stack_sub, .{ .tag = .sub, .ops = (Mir.Ops{ .reg1 = .rsp, }).encode(), - .data = .{ .imm = imm }, + .data = .{ .imm = aligned_stack_end }, }); self.mir_instructions.set(backpatch_stack_add, .{ .tag = .add, .ops = (Mir.Ops{ .reg1 = .rsp, }).encode(), - .data = .{ .imm = imm }, + .data = .{ .imm = aligned_stack_end }, }); } - while (self.stack_args_relocs.popOrNull()) |index| { - // TODO like above, gotta figure out the alignment shenanigans for macOS, etc. - const adjustment = if (self.target.isDarwin()) 2 * stack_adjustment else stack_adjustment; - // +16 bytes to account for saved return address of the `call` instruction and - // `push rbp`. - self.mir_instructions.items(.data)[index].imm += adjustment + aligned_stack_end + 16; - } } else { _ = try self.addInst(.{ .tag = .dbg_prologue_end, @@ -2194,16 +2184,15 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { if (abi_size <= 8) { const reg = try self.register_manager.allocReg(inst, &.{}); - const reloc = try self.addInst(.{ + _ = try self.addInst(.{ .tag = .mov, .ops = (Mir.Ops{ .reg1 = registerAlias(reg, @intCast(u32, abi_size)), - .reg2 = .rsp, + .reg2 = .rbp, .flags = 0b01, }).encode(), - .data = .{ .imm = off }, + .data = .{ .imm = off + 16 }, }); - try self.stack_args_relocs.append(self.bin_file.allocator, reloc); break :blk .{ .register = reg }; } @@ -2217,15 +2206,14 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { try self.register_manager.getReg(.rax, null); try self.register_manager.getReg(.rcx, null); - const reloc = try self.addInst(.{ + _ = try self.addInst(.{ .tag = .lea, .ops = (Mir.Ops{ .reg1 = addr_reg.to64(), - .reg2 = .rsp, + .reg2 = .rbp, }).encode(), - .data = .{ .imm = off }, + .data = .{ .imm = off + 16 }, }); - try self.stack_args_relocs.append(self.bin_file.allocator, reloc); // TODO allow for abi_size to be u64 try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) }); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 4ec80dd1ba..caaddb2e73 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -251,23 +251,25 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { } } fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { - const callee_preserved_regs = bits.callee_preserved_regs; - const regs = emit.mir.instructions.items(.data)[inst].regs_to_push_or_pop; - if (tag == .push) { - for (callee_preserved_regs) |reg, i| { - if ((regs >> @intCast(u5, i)) & 1 == 0) continue; - lowerToOEnc(.push, reg, emit.code) catch |err| - return emit.failWithLoweringError(err); - } - } else { - // pop in the reverse direction - var i = callee_preserved_regs.len; - while (i > 0) : (i -= 1) { - const reg = callee_preserved_regs[i - 1]; - if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue; - lowerToOEnc(.pop, reg, emit.code) catch |err| - return emit.failWithLoweringError(err); + const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]); + const payload = emit.mir.instructions.items(.data)[inst].payload; + const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data; + const regs = data.regs; + var disp: u32 = data.disp + 8; + for (bits.callee_preserved_regs) |reg, i| { + if ((regs >> @intCast(u5, i)) & 1 == 0) continue; + if (tag == .push) { + lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = @bitCast(u32, -@intCast(i32, disp)), + .base = ops.reg1, + }), reg.to64(), emit.code) catch |err| return emit.failWithLoweringError(err); + } else { + lowerToRmEnc(.mov, reg.to64(), RegisterOrMemory.mem(.qword_ptr, .{ + .disp = @bitCast(u32, -@intCast(i32, disp)), + .base = ops.reg1, + }), emit.code) catch |err| return emit.failWithLoweringError(err); } + disp += 8; } } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 7bab6ce39b..e3b79a8329 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -333,8 +333,6 @@ pub const Inst = struct { got_entry: u32, /// Index into `extra`. Meaning of what can be found there is context-dependent. payload: u32, - /// A bitfield of which callee_preserved_regs to push - regs_to_push_or_pop: u32, }; // Make sure we don't accidentally make instructions bigger than expected. @@ -346,6 +344,11 @@ pub const Inst = struct { } }; +pub const RegsToPushOrPop = struct { + regs: u32, + disp: u32, +}; + pub const ImmPair = struct { dest_off: u32, operand: u32, diff --git a/src/arch/x86_64/PrintMir.zig b/src/arch/x86_64/PrintMir.zig index 67ebb2aa58..a7a4666f77 100644 --- a/src/arch/x86_64/PrintMir.zig +++ b/src/arch/x86_64/PrintMir.zig @@ -180,26 +180,28 @@ fn mirPushPop(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: a try w.writeByte('\n'); } fn mirPushPopRegsFromCalleePreservedRegs(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void { - const callee_preserved_regs = bits.callee_preserved_regs; - // PUSH/POP reg - - const regs = print.mir.instructions.items(.data)[inst].regs_to_push_or_pop; - if (regs == 0) return w.writeAll("push/pop no regs from callee_preserved_regs\n"); - if (tag == .push) { - try w.writeAll("push "); - for (callee_preserved_regs) |reg, i| { - if ((regs >> @intCast(u5, i)) & 1 == 0) continue; - try w.print("{s}, ", .{@tagName(reg)}); - } - } else { - // pop in the reverse direction - var i = callee_preserved_regs.len; - try w.writeAll("pop "); - while (i > 0) : (i -= 1) { - if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue; - const reg = callee_preserved_regs[i - 1]; - try w.print("{s}, ", .{@tagName(reg)}); + const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]); + const payload = print.mir.instructions.items(.data)[inst].payload; + const data = print.mir.extraData(Mir.RegsToPushOrPop, payload).data; + const regs = data.regs; + var disp: u32 = data.disp + 8; + if (regs == 0) return w.writeAll("no regs from callee_preserved_regs\n"); + for (bits.callee_preserved_regs) |reg, i| { + if ((regs >> @intCast(u5, i)) & 1 == 0) continue; + if (tag == .push) { + try w.print("mov qword ptr [{s} + {d}], {s}", .{ + @tagName(ops.reg1), + @bitCast(u32, -@intCast(i32, disp)), + @tagName(reg.to64()), + }); + } else { + try w.print("mov {s}, qword ptr [{s} + {d}]", .{ + @tagName(reg.to64()), + @tagName(ops.reg1), + @bitCast(u32, -@intCast(i32, disp)), + }); } + disp += 8; } try w.writeByte('\n'); } |
