diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-12 02:11:37 -0400 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-15 03:07:51 -0400 |
| commit | f83ebd8e6c95cf37d498936cae26d3a743cddc7f (patch) | |
| tree | 79c213371dcb42883fb1b3f625b39336db4023a5 /src | |
| parent | 3681da25f865d499cffe923b7f0721cf759d3591 (diff) | |
| download | zig-f83ebd8e6c95cf37d498936cae26d3a743cddc7f.tar.gz zig-f83ebd8e6c95cf37d498936cae26d3a743cddc7f.zip | |
x86_64: implement stack probing
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 63 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 97 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 12 |
3 files changed, 163 insertions, 9 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 4f5bf89989..523faa5cb2 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1550,7 +1550,9 @@ fn gen(self: *Self) InnerError!void { const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); const backpatch_frame_align = try self.asmPlaceholder(); + const backpatch_frame_align_extra = try self.asmPlaceholder(); const backpatch_stack_alloc = try self.asmPlaceholder(); + const backpatch_stack_alloc_extra = try self.asmPlaceholder(); switch (self.ret_mcv.long) { .none, .unreach => {}, @@ -1599,24 +1601,67 @@ fn gen(self: *Self) InnerError!void { const need_stack_adjust = frame_layout.stack_adjust > 0; const need_save_reg = frame_layout.save_reg_list.count() > 0; if (need_frame_align) { + const page_align = @as(u32, math.maxInt(u32)) << 12; self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, .data = .{ .ri = .{ .r1 = .rsp, - .i = frame_layout.stack_mask, + .i = @max(frame_layout.stack_mask, page_align), } }, }); + if (frame_layout.stack_mask < page_align) { + self.mir_instructions.set(backpatch_frame_align_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_align_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = ~frame_layout.stack_mask & page_align, + } }, + }); + } } if (need_stack_adjust) { - self.mir_instructions.set(backpatch_stack_alloc, .{ - .tag = .sub, - .ops = .ri_s, - .data = .{ .ri = .{ - .r1 = .rsp, - .i = frame_layout.stack_adjust, - } }, - }); + const page_size: u32 = 1 << 12; + if (frame_layout.stack_adjust <= page_size) { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .sub, + .ops = .ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else if (frame_layout.stack_adjust < + page_size * Lower.pseudo_probe_adjust_unrolled_max_insts) + { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_unrolled_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_setup_rri_s, + .data = .{ .rri = .{ + .r1 = .rsp, + .r2 = .rax, + .i = frame_layout.stack_adjust, + } }, + }); + self.mir_instructions.set(backpatch_stack_alloc_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_loop_rr, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rax, + } }, + }); + } } if (need_frame_align or need_stack_adjust) { self.mir_instructions.set(backpatch_stack_dealloc, .{ diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index f6bce992e6..65d2b64398 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -9,19 +9,33 @@ result_insts_len: u8 = undefined, result_relocs_len: u8 = undefined, result_insts: [ std.mem.max(usize, &.{ + 1, // non-pseudo instructions 2, // cmovcc: cmovcc \ cmovcc 3, // setcc: setcc \ setcc \ logicop 2, // jcc: jcc \ jcc + pseudo_probe_align_insts, + pseudo_probe_adjust_unrolled_max_insts, + pseudo_probe_adjust_setup_insts, + pseudo_probe_adjust_loop_insts, abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs }) ]Instruction = undefined, result_relocs: [ std.mem.max(usize, &.{ + 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea 2, // jcc: jcc \ jcc + 2, // test \ jcc \ probe \ sub \ jmp + 1, // probe \ sub \ jcc }) ]Reloc = undefined, +pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp +pub const pseudo_probe_adjust_unrolled_max_insts = + pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts; +pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub +pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc + pub const Error = error{ OutOfMemory, LowerFail, @@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { else => try lower.generic(inst), .pseudo => switch (inst.ops) { .pseudo_cmov_z_and_np_rr => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rr.r2 }, .{ .reg = inst.data.rr.r1 }, @@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_cmov_nz_or_p_rr => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rr.r1 }, .{ .reg = inst.data.rr.r2 }, @@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_cmov_nz_or_p_rm_sib, .pseudo_cmov_nz_or_p_rm_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, @@ -94,6 +111,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_set_z_and_np_r => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .setz, &.{ .{ .reg = inst.data.rr.r1 }, }); @@ -108,6 +126,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_z_and_np_m_sib, .pseudo_set_z_and_np_m_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setz, &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); @@ -120,6 +139,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_set_nz_or_p_r => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .setnz, &.{ .{ .reg = inst.data.rr.r1 }, }); @@ -134,6 +154,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_nz_or_p_m_sib, .pseudo_set_nz_or_p_m_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setnz, &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); @@ -146,6 +167,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_j_z_and_np_inst => { + assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, }); @@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_j_nz_or_p_inst => { + assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, }); @@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, + .pseudo_probe_align_ri_s => { + try lower.emit(.none, .@"test", &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + try lower.emit(.none, .jz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .lea, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .mem = Memory.sib(.qword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -page_size, + }) }, + }); + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + try lower.emit(.none, .jmp, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_align_insts); + }, + .pseudo_probe_adjust_unrolled_ri_s => { + var offset = page_size; + while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -offset, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + } + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts); + }, + .pseudo_probe_adjust_setup_rri_s => { + try lower.emit(.none, .mov, &.{ + .{ .reg = inst.data.rri.r2.to32() }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rri.r1 }, + .{ .reg = inst.data.rri.r2 }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts); + }, + .pseudo_probe_adjust_loop_rr => { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.rr.r1 }, + .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 }, + .disp = -page_size, + }) }, + .{ .reg = inst.data.rr.r1.to32() }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .imm = Immediate.s(page_size) }, + }); + try lower.emit(.none, .jae, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts); + }, .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst), .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst), @@ -440,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er }}); } +const page_size: i32 = 1 << 12; + const abi = @import("abi.zig"); const assert = std.debug.assert; const bits = @import("bits.zig"); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 919974e7d2..f26bf97e82 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -740,6 +740,18 @@ pub const Inst = struct { /// Uses `inst` payload. pseudo_j_nz_or_p_inst, + /// Probe alignment + /// Uses `ri` payload + pseudo_probe_align_ri_s, + /// Probe adjust unrolled + /// Uses `ri` payload + pseudo_probe_adjust_unrolled_ri_s, + /// Probe adjust setup + /// Uses `rri` payload + pseudo_probe_adjust_setup_rri_s, + /// Probe adjust loop + /// Uses `rr` payload + pseudo_probe_adjust_loop_rr, /// Push registers /// Uses `reg_list` payload. pseudo_push_reg_list, |
