aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2022-01-21 16:04:26 +0100
committerJakub Konka <kubkon@jakubkonka.com>2022-01-22 00:46:35 +0100
commit4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2 (patch)
tree82750d896591ef9737c8e86ec0d8f847d026b901 /src
parentc9ae24503dc8da2e59f46619695bf4eb863fb3ac (diff)
downloadzig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.tar.gz
zig-4a5e75245bc60f76a82bb6bcb70553bec7d7c4f2.zip
stage2: clean up preserving callee regs on the stack
Instead of using `push` and `pop` combo, we now re-use our stack allocation mechanism which means we don't have to worry about 16-byte stack adjustments on macOS as it is handled automatically for us. Another benefit is that we don't have to backpatch stack offsets when pulling args from the stack.
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86_64/CodeGen.zig104
-rw-r--r--src/arch/x86_64/Emit.zig34
-rw-r--r--src/arch/x86_64/Mir.zig7
-rw-r--r--src/arch/x86_64/PrintMir.zig40
4 files changed, 90 insertions, 95 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 522da6e25d..c204bcbad1 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -61,8 +61,6 @@ end_di_column: u32,
/// which is a relative jump, based on the address following the reloc.
exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
-stack_args_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
-
/// Whenever there is a runtime branch, we push a Branch onto this stack,
/// and pop it off when the runtime branch joins. This provides an "overlay"
/// of the table of mappings from instructions to `MCValue` from within the branch.
@@ -286,7 +284,6 @@ pub fn generate(
defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
defer function.mir_instructions.deinit(bin_file.allocator);
defer function.mir_extra.deinit(bin_file.allocator);
- defer function.stack_args_relocs.deinit(bin_file.allocator);
defer if (builtin.mode == .Debug) function.mir_to_air_map.deinit();
var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) {
@@ -378,13 +375,6 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
fn gen(self: *Self) InnerError!void {
const cc = self.fn_type.fnCallingConvention();
if (cc != .Naked) {
- // push the callee_preserved_regs that were used
- const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
- .tag = .push_regs_from_callee_preserved_regs,
- .ops = undefined,
- .data = .{ .regs_to_push_or_pop = undefined }, // to be backpatched
- });
-
_ = try self.addInst(.{
.tag = .push,
.ops = (Mir.Ops{
@@ -416,6 +406,15 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
+ // push the callee_preserved_regs that were used
+ const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
+ .tag = .push_regs_from_callee_preserved_regs,
+ .ops = (Mir.Ops{
+ .reg1 = .rbp,
+ }).encode(),
+ .data = .{ .payload = undefined }, // to be backpatched
+ });
+
try self.genBody(self.air.getMainBody());
// TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
@@ -429,6 +428,33 @@ fn gen(self: *Self) InnerError!void {
self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len);
}
+ // calculate the data for callee_preserved_regs to be pushed and popped
+ const callee_preserved_regs_payload = blk: {
+ var data = Mir.RegsToPushOrPop{
+ .regs = 0,
+ .disp = mem.alignForwardGeneric(u32, self.next_stack_offset, 8),
+ };
+ inline for (callee_preserved_regs) |reg, i| {
+ if (self.register_manager.isRegAllocated(reg)) {
+ data.regs |= 1 << @intCast(u5, i);
+ self.max_end_stack += 8;
+ }
+ }
+ break :blk try self.addExtra(data);
+ };
+
+ const data = self.mir_instructions.items(.data);
+ // backpatch the push instruction
+ data[backpatch_push_callee_preserved_regs_i].payload = callee_preserved_regs_payload;
+ // pop the callee_preserved_regs
+ _ = try self.addInst(.{
+ .tag = .pop_regs_from_callee_preserved_regs,
+ .ops = (Mir.Ops{
+ .reg1 = .rbp,
+ }).encode(),
+ .data = .{ .payload = callee_preserved_regs_payload },
+ });
+
_ = try self.addInst(.{
.tag = .dbg_epilogue_begin,
.ops = undefined,
@@ -450,34 +476,6 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
- // calculate the data for callee_preserved_regs to be pushed and popped
- var callee_preserved_regs_push_data: u32 = 0x0;
- // TODO this is required on macOS since macOS actively checks for stack alignment
- // at every extern call site. As far as I can tell, macOS accounts for the typical
- // function prologue first 2 instructions of:
- // ...
- // push rbp
- // mov rsp, rbp
- // ...
- // Thus we don't need to adjust the stack for the first push instruction. However,
- // any subsequent push of values on the stack such as when preserving registers,
- // needs to be taken into account here.
- var stack_adjustment: u32 = 0;
- inline for (callee_preserved_regs) |reg, i| {
- if (self.register_manager.isRegAllocated(reg)) {
- callee_preserved_regs_push_data |= 1 << @intCast(u5, i);
- stack_adjustment += @divExact(reg.size(), 8);
- }
- }
- const data = self.mir_instructions.items(.data);
- // backpatch the push instruction
- data[backpatch_push_callee_preserved_regs_i].regs_to_push_or_pop = callee_preserved_regs_push_data;
- // pop the callee_preserved_regs
- _ = try self.addInst(.{
- .tag = .pop_regs_from_callee_preserved_regs,
- .ops = undefined,
- .data = .{ .regs_to_push_or_pop = callee_preserved_regs_push_data },
- });
_ = try self.addInst(.{
.tag = .ret,
.ops = (Mir.Ops{
@@ -488,36 +486,28 @@ fn gen(self: *Self) InnerError!void {
// Adjust the stack
const stack_end = self.max_end_stack;
- if (stack_end > math.maxInt(i32) - stack_adjustment) {
+ if (stack_end > math.maxInt(i32)) {
return self.failSymbol("too much stack used in call parameters", .{});
}
// TODO we should reuse this mechanism to align the stack when calling any function even if
// we do not pass any args on the stack BUT we still push regs to stack with `push` inst.
const aligned_stack_end = @intCast(u32, mem.alignForward(stack_end, self.stack_align));
- if (aligned_stack_end > 0 or (stack_adjustment > 0 and self.target.isDarwin())) {
- const imm = if (self.target.isDarwin()) aligned_stack_end + stack_adjustment else aligned_stack_end;
+ if (aligned_stack_end > 0) {
self.mir_instructions.set(backpatch_stack_sub, .{
.tag = .sub,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
- .data = .{ .imm = imm },
+ .data = .{ .imm = aligned_stack_end },
});
self.mir_instructions.set(backpatch_stack_add, .{
.tag = .add,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
- .data = .{ .imm = imm },
+ .data = .{ .imm = aligned_stack_end },
});
}
- while (self.stack_args_relocs.popOrNull()) |index| {
- // TODO like above, gotta figure out the alignment shenanigans for macOS, etc.
- const adjustment = if (self.target.isDarwin()) 2 * stack_adjustment else stack_adjustment;
- // +16 bytes to account for saved return address of the `call` instruction and
- // `push rbp`.
- self.mir_instructions.items(.data)[index].imm += adjustment + aligned_stack_end + 16;
- }
} else {
_ = try self.addInst(.{
.tag = .dbg_prologue_end,
@@ -2194,16 +2184,15 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
if (abi_size <= 8) {
const reg = try self.register_manager.allocReg(inst, &.{});
- const reloc = try self.addInst(.{
+ _ = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = registerAlias(reg, @intCast(u32, abi_size)),
- .reg2 = .rsp,
+ .reg2 = .rbp,
.flags = 0b01,
}).encode(),
- .data = .{ .imm = off },
+ .data = .{ .imm = off + 16 },
});
- try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
break :blk .{ .register = reg };
}
@@ -2217,15 +2206,14 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
- const reloc = try self.addInst(.{
+ _ = try self.addInst(.{
.tag = .lea,
.ops = (Mir.Ops{
.reg1 = addr_reg.to64(),
- .reg2 = .rsp,
+ .reg2 = .rbp,
}).encode(),
- .data = .{ .imm = off },
+ .data = .{ .imm = off + 16 },
});
- try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 4ec80dd1ba..caaddb2e73 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -251,23 +251,25 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
}
}
fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
- const callee_preserved_regs = bits.callee_preserved_regs;
- const regs = emit.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
- if (tag == .push) {
- for (callee_preserved_regs) |reg, i| {
- if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
- lowerToOEnc(.push, reg, emit.code) catch |err|
- return emit.failWithLoweringError(err);
- }
- } else {
- // pop in the reverse direction
- var i = callee_preserved_regs.len;
- while (i > 0) : (i -= 1) {
- const reg = callee_preserved_regs[i - 1];
- if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
- lowerToOEnc(.pop, reg, emit.code) catch |err|
- return emit.failWithLoweringError(err);
+ const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+ const payload = emit.mir.instructions.items(.data)[inst].payload;
+ const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data;
+ const regs = data.regs;
+ var disp: u32 = data.disp + 8;
+ for (bits.callee_preserved_regs) |reg, i| {
+ if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
+ if (tag == .push) {
+ lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
+ .disp = @bitCast(u32, -@intCast(i32, disp)),
+ .base = ops.reg1,
+ }), reg.to64(), emit.code) catch |err| return emit.failWithLoweringError(err);
+ } else {
+ lowerToRmEnc(.mov, reg.to64(), RegisterOrMemory.mem(.qword_ptr, .{
+ .disp = @bitCast(u32, -@intCast(i32, disp)),
+ .base = ops.reg1,
+ }), emit.code) catch |err| return emit.failWithLoweringError(err);
}
+ disp += 8;
}
}
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 7bab6ce39b..e3b79a8329 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -333,8 +333,6 @@ pub const Inst = struct {
got_entry: u32,
/// Index into `extra`. Meaning of what can be found there is context-dependent.
payload: u32,
- /// A bitfield of which callee_preserved_regs to push
- regs_to_push_or_pop: u32,
};
// Make sure we don't accidentally make instructions bigger than expected.
@@ -346,6 +344,11 @@ pub const Inst = struct {
}
};
+pub const RegsToPushOrPop = struct {
+ regs: u32,
+ disp: u32,
+};
+
pub const ImmPair = struct {
dest_off: u32,
operand: u32,
diff --git a/src/arch/x86_64/PrintMir.zig b/src/arch/x86_64/PrintMir.zig
index 67ebb2aa58..a7a4666f77 100644
--- a/src/arch/x86_64/PrintMir.zig
+++ b/src/arch/x86_64/PrintMir.zig
@@ -180,26 +180,28 @@ fn mirPushPop(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: a
try w.writeByte('\n');
}
fn mirPushPopRegsFromCalleePreservedRegs(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void {
- const callee_preserved_regs = bits.callee_preserved_regs;
- // PUSH/POP reg
-
- const regs = print.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
- if (regs == 0) return w.writeAll("push/pop no regs from callee_preserved_regs\n");
- if (tag == .push) {
- try w.writeAll("push ");
- for (callee_preserved_regs) |reg, i| {
- if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
- try w.print("{s}, ", .{@tagName(reg)});
- }
- } else {
- // pop in the reverse direction
- var i = callee_preserved_regs.len;
- try w.writeAll("pop ");
- while (i > 0) : (i -= 1) {
- if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
- const reg = callee_preserved_regs[i - 1];
- try w.print("{s}, ", .{@tagName(reg)});
+ const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
+ const payload = print.mir.instructions.items(.data)[inst].payload;
+ const data = print.mir.extraData(Mir.RegsToPushOrPop, payload).data;
+ const regs = data.regs;
+ var disp: u32 = data.disp + 8;
+ if (regs == 0) return w.writeAll("no regs from callee_preserved_regs\n");
+ for (bits.callee_preserved_regs) |reg, i| {
+ if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
+ if (tag == .push) {
+ try w.print("mov qword ptr [{s} + {d}], {s}", .{
+ @tagName(ops.reg1),
+ @bitCast(u32, -@intCast(i32, disp)),
+ @tagName(reg.to64()),
+ });
+ } else {
+ try w.print("mov {s}, qword ptr [{s} + {d}]", .{
+ @tagName(reg.to64()),
+ @tagName(ops.reg1),
+ @bitCast(u32, -@intCast(i32, disp)),
+ });
}
+ disp += 8;
}
try w.writeByte('\n');
}