aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2022-01-21 16:04:26 +0100
committerJakub Konka <kubkon@jakubkonka.com>2022-01-22 00:46:35 +0100
commit4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2 (patch)
tree82750d896591ef9737c8e86ec0d8f847d026b901
parentc9ae24503dc8da2e59f46619695bf4eb863fb3ac (diff)
downloadzig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.tar.gz
zig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.zip
stage2: clean up preserving callee regs on the stack
Instead of using `push` and `pop` combo, we now re-use our stack allocation mechanism which means we don't have to worry about 16-byte stack adjustments on macOS as it is handled automatically for us. Another benefit is that we don't have to backpatch stack offsets when pulling args from the stack.
-rw-r--r--src/arch/x86_64/CodeGen.zig104
-rw-r--r--src/arch/x86_64/Emit.zig34
-rw-r--r--src/arch/x86_64/Mir.zig7
-rw-r--r--src/arch/x86_64/PrintMir.zig40
4 files changed, 90 insertions, 95 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 522da6e25d..c204bcbad1 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -61,8 +61,6 @@ end_di_column: u32,
/// which is a relative jump, based on the address following the reloc.
exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
-stack_args_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
-
/// Whenever there is a runtime branch, we push a Branch onto this stack,
/// and pop it off when the runtime branch joins. This provides an "overlay"
/// of the table of mappings from instructions to `MCValue` from within the branch.
@@ -286,7 +284,6 @@ pub fn generate(
defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
defer function.mir_instructions.deinit(bin_file.allocator);
defer function.mir_extra.deinit(bin_file.allocator);
- defer function.stack_args_relocs.deinit(bin_file.allocator);
defer if (builtin.mode == .Debug) function.mir_to_air_map.deinit();
var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) {
@@ -378,13 +375,6 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
fn gen(self: *Self) InnerError!void {
const cc = self.fn_type.fnCallingConvention();
if (cc != .Naked) {
- // push the callee_preserved_regs that were used
- const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
- .tag = .push_regs_from_callee_preserved_regs,
- .ops = undefined,
- .data = .{ .regs_to_push_or_pop = undefined }, // to be backpatched
- });
-
_ = try self.addInst(.{
.tag = .push,
.ops = (Mir.Ops{
@@ -416,6 +406,15 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
+ // push the callee_preserved_regs that were used
+ const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
+ .tag = .push_regs_from_callee_preserved_regs,
+ .ops = (Mir.Ops{
+ .reg1 = .rbp,
+ }).encode(),
+ .data = .{ .payload = undefined }, // to be backpatched
+ });
+
try self.genBody(self.air.getMainBody());
// TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
@@ -429,6 +428,33 @@ fn gen(self: *Self) InnerError!void {
self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len);
}
+ // calculate the data for callee_preserved_regs to be pushed and popped
+ const callee_preserved_regs_payload = blk: {
+ var data = Mir.RegsToPushOrPop{
+ .regs = 0,
+ .disp = mem.alignForwardGeneric(u32, self.next_stack_offset, 8),
+ };
+ inline for (callee_preserved_regs) |reg, i| {
+ if (self.register_manager.isRegAllocated(reg)) {
+ data.regs |= 1 << @intCast(u5, i);
+ self.max_end_stack += 8;
+ }
+ }
+ break :blk try self.addExtra(data);
+ };
+
+ const data = self.mir_instructions.items(.data);
+ // backpatch the push instruction
+ data[backpatch_push_callee_preserved_regs_i].payload = callee_preserved_regs_payload;
+ // pop the callee_preserved_regs
+ _ = try self.addInst(.{
+ .tag = .pop_regs_from_callee_preserved_regs,
+ .ops = (Mir.Ops{
+ .reg1 = .rbp,
+ }).encode(),
+ .data = .{ .payload = callee_preserved_regs_payload },
+ });
+
_ = try self.addInst(.{
.tag = .dbg_epilogue_begin,
.ops = undefined,
@@ -450,34 +476,6 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
- // calculate the data for callee_preserved_regs to be pushed and popped
- var callee_preserved_regs_push_data: u32 = 0x0;
- // TODO this is required on macOS since macOS actively checks for stack alignment
- // at every extern call site. As far as I can tell, macOS accounts for the typical
- // function prologue first 2 instructions of:
- // ...
- // push rbp
- // mov rsp, rbp
- // ...
- // Thus we don't need to adjust the stack for the first push instruction. However,
- // any subsequent push of values on the stack such as when preserving registers,
- // needs to be taken into account here.
- var stack_adjustment: u32 = 0;
- inline for (callee_preserved_regs) |reg, i| {
- if (self.register_manager.isRegAllocated(reg)) {
- callee_preserved_regs_push_data |= 1 << @intCast(u5, i);
- stack_adjustment += @divExact(reg.size(), 8);
- }
- }
- const data = self.mir_instructions.items(.data);
- // backpatch the push instruction
- data[backpatch_push_callee_preserved_regs_i].regs_to_push_or_pop = callee_preserved_regs_push_data;
- // pop the callee_preserved_regs
- _ = try self.addInst(.{
- .tag = .pop_regs_from_callee_preserved_regs,
- .ops = undefined,
- .data = .{ .regs_to_push_or_pop = callee_preserved_regs_push_data },
- });
_ = try self.addInst(.{
.tag = .ret,
.ops = (Mir.Ops{
@@ -488,36 +486,28 @@ fn gen(self: *Self) InnerError!void {
// Adjust the stack
const stack_end = self.max_end_stack;
- if (stack_end > math.maxInt(i32) - stack_adjustment) {
+ if (stack_end > math.maxInt(i32)) {
return self.failSymbol("too much stack used in call parameters", .{});
}
// TODO we should reuse this mechanism to align the stack when calling any function even if
// we do not pass any args on the stack BUT we still push regs to stack with `push` inst.
const aligned_stack_end = @intCast(u32, mem.alignForward(stack_end, self.stack_align));
- if (aligned_stack_end > 0 or (stack_adjustment > 0 and self.target.isDarwin())) {
- const imm = if (self.target.isDarwin()) aligned_stack_end + stack_adjustment else aligned_stack_end;
+ if (aligned_stack_end > 0) {
self.mir_instructions.set(backpatch_stack_sub, .{
.tag = .sub,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
- .data = .{ .imm = imm },
+ .data = .{ .imm = aligned_stack_end },
});
self.mir_instructions.set(backpatch_stack_add, .{
.tag = .add,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
- .data = .{ .imm = imm },
+ .data = .{ .imm = aligned_stack_end },
});
}
- while (self.stack_args_relocs.popOrNull()) |index| {
- // TODO like above, gotta figure out the alignment shenanigans for macOS, etc.
- const adjustment = if (self.target.isDarwin()) 2 * stack_adjustment else stack_adjustment;
- // +16 bytes to account for saved return address of the `call` instruction and
- // `push rbp`.
- self.mir_instructions.items(.data)[index].imm += adjustment + aligned_stack_end + 16;
- }
} else {
_ = try self.addInst(.{
.tag = .dbg_prologue_end,
@@ -2194,16 +2184,15 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
if (abi_size <= 8) {
const reg = try self.register_manager.allocReg(inst, &.{});
- const reloc = try self.addInst(.{
+ _ = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = registerAlias(reg, @intCast(u32, abi_size)),
- .reg2 = .rsp,
+ .reg2 = .rbp,
.flags = 0b01,
}).encode(),
- .data = .{ .imm = off },
+ .data = .{ .imm = off + 16 },
});
- try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
break :blk .{ .register = reg };
}
@@ -2217,15 +2206,14 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
- const reloc = try self.addInst(.{
+ _ = try self.addInst(.{
.tag = .lea,
.ops = (Mir.Ops{
.reg1 = addr_reg.to64(),
- .reg2 = .rsp,
+ .reg2 = .rbp,
}).encode(),
- .data = .{ .imm = off },
+ .data = .{ .imm = off + 16 },
});
- try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 4ec80dd1ba..caaddb2e73 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -251,23 +251,25 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
}
}
fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
- const callee_preserved_regs = bits.callee_preserved_regs;
- const regs = emit.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
- if (tag == .push) {
- for (callee_preserved_regs) |reg, i| {
- if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
- lowerToOEnc(.push, reg, emit.code) catch |err|
- return emit.failWithLoweringError(err);
- }
- } else {
- // pop in the reverse direction
- var i = callee_preserved_regs.len;
- while (i > 0) : (i -= 1) {
- const reg = callee_preserved_regs[i - 1];
- if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
- lowerToOEnc(.pop, reg, emit.code) catch |err|
- return emit.failWithLoweringError(err);
+ const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+ const payload = emit.mir.instructions.items(.data)[inst].payload;
+ const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data;
+ const regs = data.regs;
+ var disp: u32 = data.disp + 8;
+ for (bits.callee_preserved_regs) |reg, i| {
+ if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
+ if (tag == .push) {
+ lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
+ .disp = @bitCast(u32, -@intCast(i32, disp)),
+ .base = ops.reg1,
+ }), reg.to64(), emit.code) catch |err| return emit.failWithLoweringError(err);
+ } else {
+ lowerToRmEnc(.mov, reg.to64(), RegisterOrMemory.mem(.qword_ptr, .{
+ .disp = @bitCast(u32, -@intCast(i32, disp)),
+ .base = ops.reg1,
+ }), emit.code) catch |err| return emit.failWithLoweringError(err);
}
+ disp += 8;
}
}
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 7bab6ce39b..e3b79a8329 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -333,8 +333,6 @@ pub const Inst = struct {
got_entry: u32,
/// Index into `extra`. Meaning of what can be found there is context-dependent.
payload: u32,
- /// A bitfield of which callee_preserved_regs to push
- regs_to_push_or_pop: u32,
};
// Make sure we don't accidentally make instructions bigger than expected.
@@ -346,6 +344,11 @@ pub const Inst = struct {
}
};
+pub const RegsToPushOrPop = struct {
+ regs: u32,
+ disp: u32,
+};
+
pub const ImmPair = struct {
dest_off: u32,
operand: u32,
diff --git a/src/arch/x86_64/PrintMir.zig b/src/arch/x86_64/PrintMir.zig
index 67ebb2aa58..a7a4666f77 100644
--- a/src/arch/x86_64/PrintMir.zig
+++ b/src/arch/x86_64/PrintMir.zig
@@ -180,26 +180,28 @@ fn mirPushPop(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: a
try w.writeByte('\n');
}
fn mirPushPopRegsFromCalleePreservedRegs(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void {
- const callee_preserved_regs = bits.callee_preserved_regs;
- // PUSH/POP reg
-
- const regs = print.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
- if (regs == 0) return w.writeAll("push/pop no regs from callee_preserved_regs\n");
- if (tag == .push) {
- try w.writeAll("push ");
- for (callee_preserved_regs) |reg, i| {
- if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
- try w.print("{s}, ", .{@tagName(reg)});
- }
- } else {
- // pop in the reverse direction
- var i = callee_preserved_regs.len;
- try w.writeAll("pop ");
- while (i > 0) : (i -= 1) {
- if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
- const reg = callee_preserved_regs[i - 1];
- try w.print("{s}, ", .{@tagName(reg)});
+ const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
+ const payload = print.mir.instructions.items(.data)[inst].payload;
+ const data = print.mir.extraData(Mir.RegsToPushOrPop, payload).data;
+ const regs = data.regs;
+ var disp: u32 = data.disp + 8;
+ if (regs == 0) return w.writeAll("no regs from callee_preserved_regs\n");
+ for (bits.callee_preserved_regs) |reg, i| {
+ if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
+ if (tag == .push) {
+ try w.print("mov qword ptr [{s} + {d}], {s}", .{
+ @tagName(ops.reg1),
+ @bitCast(u32, -@intCast(i32, disp)),
+ @tagName(reg.to64()),
+ });
+ } else {
+ try w.print("mov {s}, qword ptr [{s} + {d}]", .{
+ @tagName(reg.to64()),
+ @tagName(ops.reg1),
+ @bitCast(u32, -@intCast(i32, disp)),
+ });
}
+ disp += 8;
}
try w.writeByte('\n');
}