diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-09-03 20:36:01 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-09-07 22:42:57 +0200 |
| commit | e0167ae0e3f0e6c6a667b372b21c322d47ccd92d (patch) | |
| tree | 27b8d51339716708ae8398f7683aa7f57e48e245 /src | |
| parent | 619d82234ecbb8a130e827e9de6f545288649a58 (diff) | |
| download | zig-e0167ae0e3f0e6c6a667b372b21c322d47ccd92d.tar.gz zig-e0167ae0e3f0e6c6a667b372b21c322d47ccd92d.zip | |
x86_64: allow for any index register in complex SIB encodings
This relieves register pressure, and reduce generated code size
(since now we can use the same index register for both `mov_scale_src`
and `mov_scale_dst` MIR instructions).
Fix lowering of ModRM + SIB encodings where index register is extended
- previously, we would carelessly ignore the fact generating incorrect
encodings.
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 104 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 69 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 87 |
3 files changed, 154 insertions, 106 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0269a44be8..b03acfb848 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5932,7 +5932,6 @@ const InlineMemcpyOpts = struct { dest_stack_base: ?Register = null, }; -/// Spills .rax and .rcx. fn genInlineMemcpy( self: *Self, dst_ptr: MCValue, @@ -5940,19 +5939,6 @@ fn genInlineMemcpy( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - // TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore - // How can we do this without context if the value inside .rax or .rcx we preserve contains - // value needed to perform the memcpy in the first place? - // I think we should have an accumulator-based context that we pass with each subsequent helper - // call until we resolve the entire instruction. - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rcx, null); - - const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rcx }); - defer for (reg_locks) |lock| { - self.register_manager.unlockReg(lock); - }; - const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg| self.register_manager.lockReg(reg) else @@ -5965,7 +5951,13 @@ fn genInlineMemcpy( null; defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock); - const dst_addr_reg = try self.register_manager.allocReg(null, gp); + const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); + const dst_addr_reg = regs[0]; + const src_addr_reg = regs[1]; + const index_reg = regs[2].to64(); + const count_reg = regs[3].to64(); + const tmp_reg = regs[4].to8(); + switch (dst_ptr) { .memory, .got_load, @@ -5998,10 +5990,7 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr}); }, } - const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg); - defer self.register_manager.unlockReg(dst_addr_reg_lock); - const src_addr_reg = try self.register_manager.allocReg(null, gp); switch (src_ptr) { .memory, .got_load, @@ -6034,26 +6023,13 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr}); }, } - const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); - defer self.register_manager.unlockReg(src_addr_reg_lock); - - const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); - const count_reg = regs[0].to64(); - const tmp_reg = regs[1].to8(); try self.genSetReg(Type.usize, count_reg, len); - // mov rcx, 0 - _ = try self.addInst(.{ - .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }), - .data = .{ .imm = 0 }, - }); - - // mov rax, 0 + // mov index_reg, 0 _ = try self.addInst(.{ .tag = .mov, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }), .data = .{ .imm = 0 }, }); @@ -6075,37 +6051,30 @@ fn genInlineMemcpy( } }, }); - // mov tmp, [addr + rcx] + // mov tmp, [addr + index_reg] _ = try self.addInst(.{ .tag = .mov_scale_src, .ops = Mir.Inst.Ops.encode(.{ .reg1 = tmp_reg.to8(), .reg2 = src_addr_reg, }), - .data = .{ .imm = 0 }, + .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) }, }); - // mov [stack_offset + rax], tmp + // mov [stack_offset + index_reg], tmp _ = try self.addInst(.{ .tag = .mov_scale_dst, .ops = Mir.Inst.Ops.encode(.{ .reg1 = dst_addr_reg, .reg2 = tmp_reg.to8(), }), - .data = .{ .imm = 0 }, + .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) }, }); - // add rcx, 1 + // add index_reg, 1 _ = try self.addInst(.{ .tag = .add, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }), - .data = .{ .imm = 1 }, - }); - - // add rax, 1 - _ = try self.addInst(.{ - .tag = .add, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }), .data = .{ .imm = 1 }, }); @@ -6127,7 +6096,6 @@ fn genInlineMemcpy( try self.performReloc(loop_reloc); } -/// Spills .rax register. fn genInlineMemset( self: *Self, dst_ptr: MCValue, @@ -6135,12 +6103,22 @@ fn genInlineMemset( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - // TODO preserve contents of .rax and then restore - try self.register_manager.getReg(.rax, null); - const rax_lock = self.register_manager.lockRegAssumeUnused(.rax); - defer self.register_manager.unlockReg(rax_lock); + const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg| + self.register_manager.lockReg(reg) + else + null; + defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg); + + const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg| + self.register_manager.lockReg(reg) + else + null; + defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock); + + const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); + const addr_reg = regs[0]; + const index_reg = regs[1].to64(); - const addr_reg = try self.register_manager.allocReg(null, gp); switch (dst_ptr) { .memory, .got_load, @@ -6173,17 +6151,15 @@ fn genInlineMemset( return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr}); }, } - const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_reg_lock); - try self.genSetReg(Type.usize, .rax, len); - try self.genBinOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 }); + try self.genSetReg(Type.usize, index_reg, len); + try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 }); // loop: - // cmp rax, -1 + // cmp index_reg, -1 const loop_start = try self.addInst(.{ .tag = .cmp, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }), .data = .{ .imm = @bitCast(u32, @as(i32, -1)) }, }); @@ -6202,24 +6178,20 @@ fn genInlineMemset( if (x > math.maxInt(i32)) { return self.fail("TODO inline memset for value immediate larger than 32bits", .{}); } - // mov byte ptr [rbp + rax + stack_offset], imm - const payload = try self.addExtra(Mir.ImmPair{ - .dest_off = 0, - .operand = @truncate(u32, x), - }); + // mov byte ptr [rbp + index_reg + stack_offset], imm _ = try self.addInst(.{ .tag = .mov_mem_index_imm, .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }), - .data = .{ .payload = payload }, + .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDispImm.encode(index_reg, 0, @truncate(u32, x))) }, }); }, else => return self.fail("TODO inline memset for value of type {}", .{value}), } - // sub rax, 1 + // sub index_reg, 1 _ = try self.addInst(.{ .tag = .sub, - .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }), + .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }), .data = .{ .imm = 1 }, }); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 66e603aab0..61ae772794 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -615,14 +615,15 @@ inline fn immOpSize(u_imm: u32) u6 { fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; - const imm = emit.mir.instructions.items(.data)[inst].imm; - // OP reg1, [reg2 + scale*rcx + imm32] + const payload = emit.mir.instructions.items(.data)[inst].payload; + const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode(); + // OP reg1, [reg2 + scale*index + imm32] const scale_index = ScaleIndex{ .scale = scale, - .index = .rcx, + .index = index_reg_disp.index, }; return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ - .disp = imm, + .disp = index_reg_disp.disp, .base = ops.reg2, .scale_index = scale_index, }), emit.code); @@ -631,22 +632,16 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; - const imm = emit.mir.instructions.items(.data)[inst].imm; + const payload = emit.mir.instructions.items(.data)[inst].payload; + const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode(); const scale_index = ScaleIndex{ .scale = scale, - .index = .rax, + .index = index_reg_disp.index, }; - if (ops.reg2 == .none) { - // OP qword ptr [reg1 + scale*rax + 0], imm32 - return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{ - .disp = 0, - .base = ops.reg1, - .scale_index = scale_index, - }), imm, emit.code); - } - // OP [reg1 + scale*rax + imm32], reg2 + assert(ops.reg2 != .none); + // OP [reg1 + scale*index + imm32], reg2 return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{ - .disp = imm, + .disp = index_reg_disp.disp, .base = ops.reg1, .scale_index = scale_index, }), ops.reg2, emit.code); @@ -656,24 +651,24 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void const ops = emit.mir.instructions.items(.ops)[inst].decode(); const scale = ops.flags; const payload = emit.mir.instructions.items(.data)[inst].payload; - const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode(); const scale_index = ScaleIndex{ .scale = scale, - .index = .rax, + .index = index_reg_disp_imm.index, }; - // OP qword ptr [reg1 + scale*rax + imm32], imm32 + // OP qword ptr [reg1 + scale*index + imm32], imm32 return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{ - .disp = imm_pair.dest_off, + .disp = index_reg_disp_imm.disp, .base = ops.reg1, .scale_index = scale_index, - }), imm_pair.operand, emit.code); + }), index_reg_disp_imm.imm, emit.code); } fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { const ops = emit.mir.instructions.items(.ops)[inst].decode(); assert(ops.reg2 == .none); const payload = emit.mir.instructions.items(.data)[inst].payload; - const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode(); const ptr_size: Memory.PtrSize = switch (ops.flags) { 0b00 => .byte_ptr, 0b01 => .word_ptr, @@ -682,14 +677,14 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v }; const scale_index = ScaleIndex{ .scale = 0, - .index = .rax, + .index = index_reg_disp_imm.index, }; - // OP ptr [reg1 + rax*1 + imm32], imm32 + // OP ptr [reg1 + index + imm32], imm32 return lowerToMiEnc(tag, RegisterOrMemory.mem(ptr_size, .{ - .disp = imm_pair.dest_off, + .disp = index_reg_disp_imm.disp, .base = ops.reg1, .scale_index = scale_index, - }), imm_pair.operand, emit.code); + }), index_reg_disp_imm.imm, emit.code); } fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { @@ -957,18 +952,19 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp); }, 0b10 => { - // lea reg, [rbp + rcx + imm32] - const imm = emit.mir.instructions.items(.data)[inst].imm; + // lea reg, [rbp + index + imm32] + const payload = emit.mir.instructions.items(.data)[inst].payload; + const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode(); const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null; const scale_index = ScaleIndex{ .scale = 0, - .index = .rcx, + .index = index_reg_disp.index, }; return lowerToRmEnc( .lea, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{ - .disp = imm, + .disp = index_reg_disp.disp, .base = src_reg, .scale_index = scale_index, }), @@ -2255,6 +2251,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st encoder.rex(.{ .w = wide, .b = base.isExtended(), + .x = if (mem_op.scale_index) |si| si.index.isExtended() else false, }); } opc.encode(encoder); @@ -2360,10 +2357,12 @@ fn lowerToMiXEnc( encoder.rex(.{ .w = dst_mem.ptr_size == .qword_ptr, .b = base.isExtended(), + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } else { encoder.rex(.{ .w = dst_mem.ptr_size == .qword_ptr, + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } opc.encode(encoder); @@ -2415,11 +2414,13 @@ fn lowerToRmEnc( .w = setRexWRegister(reg), .r = reg.isExtended(), .b = base.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } else { encoder.rex(.{ .w = setRexWRegister(reg), .r = reg.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } opc.encode(encoder); @@ -2460,11 +2461,13 @@ fn lowerToMrEnc( .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg), .r = reg.isExtended(), .b = base.isExtended(), + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } else { encoder.rex(.{ .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg), .r = reg.isExtended(), + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } opc.encode(encoder); @@ -2504,11 +2507,13 @@ fn lowerToRmiEnc( .w = setRexWRegister(reg), .r = reg.isExtended(), .b = base.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } else { encoder.rex(.{ .w = setRexWRegister(reg), .r = reg.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } opc.encode(encoder); @@ -2545,10 +2550,12 @@ fn lowerToVmEnc( vex.rex(.{ .r = reg.isExtended(), .b = base.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } else { vex.rex(.{ .r = reg.isExtended(), + .x = if (src_mem.scale_index) |si| si.index.isExtended() else false, }); } encoder.vex(enc.prefix); @@ -2585,10 +2592,12 @@ fn lowerToMvEnc( vex.rex(.{ .r = reg.isExtended(), .b = base.isExtended(), + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } else { vex.rex(.{ .r = reg.isExtended(), + .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false, }); } encoder.vex(enc.prefix); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index ca19847042..182f3267a6 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -44,25 +44,28 @@ pub const Inst = struct { /// 0b01 word ptr [reg1 + imm32], imm16 /// 0b10 dword ptr [reg1 + imm32], imm32 /// 0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64) + /// Notes: + /// * Uses `ImmPair` as payload adc_mem_imm, - /// form: reg1, [reg2 + scale*rcx + imm32] + /// form: reg1, [reg2 + scale*index + imm32] /// ops flags scale /// 0b00 1 /// 0b01 2 /// 0b10 4 /// 0b11 8 + /// Notes: + /// * Uses `IndexRegisterDisp` as payload adc_scale_src, - /// form: [reg1 + scale*rax + imm32], reg2 - /// form: [reg1 + scale*rax + 0], imm32 + /// form: [reg1 + scale*index + imm32], reg2 /// ops flags scale /// 0b00 1 /// 0b01 2 /// 0b10 4 /// 0b11 8 /// Notes: - /// * If reg2 is `none` then it means Data field `imm` is used as the immediate. + /// * Uses `IndexRegisterDisp` payload. adc_scale_dst, /// form: [reg1 + scale*rax + imm32], imm32 @@ -72,14 +75,16 @@ pub const Inst = struct { /// 0b10 4 /// 0b11 8 /// Notes: - /// * Data field `payload` points at `ImmPair`. + /// * Uses `IndexRegisterDispImm` payload. adc_scale_imm, /// ops flags: form: - /// 0b00 byte ptr [reg1 + rax + imm32], imm8 - /// 0b01 word ptr [reg1 + rax + imm32], imm16 - /// 0b10 dword ptr [reg1 + rax + imm32], imm32 - /// 0b11 qword ptr [reg1 + rax + imm32], imm32 (sign-extended to imm64) + /// 0b00 byte ptr [reg1 + index + imm32], imm8 + /// 0b01 word ptr [reg1 + index + imm32], imm16 + /// 0b10 dword ptr [reg1 + index + imm32], imm32 + /// 0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64) + /// Notes: + /// * Uses `IndexRegisterDispImm` payload. adc_mem_index_imm, // The following instructions all have the same encoding as `adc`. @@ -174,7 +179,9 @@ pub const Inst = struct { /// 0b00 reg1, [reg2 + imm32] /// 0b00 reg1, [ds:imm32] /// 0b01 reg1, [rip + imm32] - /// 0b10 reg1, [reg2 + rcx + imm32] + /// 0b10 reg1, [reg2 + index + imm32] + /// Notes: + /// * 0b10 uses `IndexRegisterDisp` payload lea, /// ops flags: form: @@ -461,6 +468,66 @@ pub const Inst = struct { } }; +pub const IndexRegisterDisp = struct { + /// Index register to use with SIB-based encoding + index: u32, + + /// Displacement value + disp: u32, + + pub fn encode(index: Register, disp: u32) IndexRegisterDisp { + return .{ + .index = @enumToInt(index), + .disp = disp, + }; + } + + pub fn decode(this: IndexRegisterDisp) struct { + index: Register, + disp: u32, + } { + return .{ + .index = @intToEnum(Register, this.index), + .disp = this.disp, + }; + } +}; + +/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list +/// instead of having two structs, one a superset of the other one? +pub const IndexRegisterDispImm = struct { + /// Index register to use with SIB-based encoding + index: u32, + + /// Displacement value + disp: u32, + + /// Immediate + imm: u32, + + pub fn encode(index: Register, disp: u32, imm: u32) IndexRegisterDispImm { + return .{ + .index = @enumToInt(index), + .disp = disp, + .imm = imm, + }; + } + + pub fn decode(this: IndexRegisterDispImm) struct { + index: Register, + disp: u32, + imm: u32, + } { + return .{ + .index = @intToEnum(Register, this.index), + .disp = this.disp, + .imm = this.imm, + }; + } +}; + +/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers +/// in a compact manner. pub const RegisterList = struct { bitset: BitSet = BitSet.initEmpty(), |
