diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-03-30 11:22:35 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-03-30 11:22:35 +0200 |
| commit | b153e156b1d431995e7f89150c93ec05aa3966f8 (patch) | |
| tree | 9311b768bbf2547767df8a3dc8c58ececdac190f /src | |
| parent | c21f046a8b019c42aa0dbb0fb9c592b590edf977 (diff) | |
| parent | 376d0878ec2e366633e2dbc7c91ab7ae2a6ae5b7 (diff) | |
| download | zig-b153e156b1d431995e7f89150c93ec05aa3966f8.tar.gz zig-b153e156b1d431995e7f89150c93ec05aa3966f8.zip | |
Merge pull request #11342 from ziglang/stage2-x64-mul-div
x64: use all available registers (caller and callee), and fix how we multiply (fixes @mulWithOverflow for byte-size operands)
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 316 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 7 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 1 | ||||
| -rw-r--r-- | src/arch/x86_64/abi.zig | 11 |
4 files changed, 195 insertions, 140 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 5d8864f1a2..9ecd2d4551 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -31,6 +31,8 @@ const bits = @import("bits.zig"); const abi = @import("abi.zig"); const Register = bits.Register; const callee_preserved_regs = abi.callee_preserved_regs; +const caller_preserved_regs = abi.caller_preserved_regs; +const allocatable_registers = abi.allocatable_registers; const c_abi_int_param_regs = abi.c_abi_int_param_regs; const c_abi_int_return_regs = abi.c_abi_int_return_regs; @@ -40,7 +42,7 @@ const InnerError = error{ OutOfRegisters, }; -const RegisterManager = RegisterManagerFn(Self, Register, &callee_preserved_regs); +const RegisterManager = RegisterManagerFn(Self, Register, &allocatable_registers); gpa: Allocator, air: Air, @@ -56,7 +58,6 @@ arg_index: u32, src_loc: Module.SrcLoc, stack_align: u32, -ret_backpatches: std.ArrayListUnmanaged(Mir.Inst.Index) = .{}, compare_flags_inst: ?Air.Inst.Index = null, /// MIR Instructions @@ -351,7 +352,6 @@ pub fn generate( std.AutoHashMap(Mir.Inst.Index, Air.Inst.Index).init(bin_file.allocator) else {}, }; - defer function.ret_backpatches.deinit(bin_file.allocator); defer function.stack.deinit(bin_file.allocator); defer function.blocks.deinit(bin_file.allocator); defer function.exitlude_jump_relocs.deinit(bin_file.allocator); @@ -480,6 +480,21 @@ fn gen(self: *Self) InnerError!void { .data = undefined, }); + if (self.ret_mcv == .stack_offset) { + // The address where to store the return value for the caller is in `.rdi` + // register which the callee is free to clobber. Therefore, we purposely + // spill it to stack immediately. + const ptr_ty = Type.usize; + const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*)); + const abi_align = ptr_ty.abiAlignment(self.target.*); + const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); + self.next_stack_offset = stack_offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); + try self.genSetStack(ptr_ty, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{}); + self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) }; + log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset}); + } + _ = try self.addInst(.{ .tag = .dbg_prologue_end, .ops = undefined, @@ -517,23 +532,6 @@ fn gen(self: *Self) InnerError!void { var disp = data.disp + 8; inline for (callee_preserved_regs) |reg, i| { if (self.register_manager.isRegAllocated(reg)) { - if (reg.to64() == .rdi) { - for (self.ret_backpatches.items) |inst| { - log.debug(".rdi was spilled, backpatching with mov from stack at offset {}", .{ - -@intCast(i32, disp), - }); - const ops = Mir.Ops.decode(self.mir_instructions.items(.ops)[inst]); - self.mir_instructions.set(inst, Mir.Inst{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = ops.reg1, - .reg2 = .rbp, - .flags = 0b01, - }).encode(), - .data = .{ .imm = @bitCast(u32, -@intCast(i32, disp)) }, - }); - } - } data.regs |= 1 << @intCast(u5, i); self.max_end_stack += 8; disp += 8; @@ -912,8 +910,7 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u // TODO find a free slot instead of always appending const offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); self.next_stack_offset = offset; - if (self.next_stack_offset > self.max_end_stack) - self.max_end_stack = self.next_stack_offset; + self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset); try self.stack.putNoClobber(self.gpa, offset, .{ .inst = inst, .size = abi_size, @@ -1256,7 +1253,7 @@ fn genPtrBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r offset_mcv.freezeIfRegister(&self.register_manager); defer offset_mcv.unfreezeIfRegister(&self.register_manager); - try self.genIMulOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size }); + try self.genIntMulComplexOpMir(offset_ty, offset_mcv, .{ .immediate = elem_size }); const tag = self.air.instructions.items(.tag)[inst]; switch (tag) { @@ -1398,10 +1395,29 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { fn airMul(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const result: MCValue = if (self.liveness.isUnused(inst)) - .dead - else - try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const ty = self.air.typeOfIndex(inst); + + if (ty.zigTypeTag() != .Int) { + return self.fail("TODO implement 'mul' for operands of dst type {}", .{ty.zigTypeTag()}); + } + + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. + try self.register_manager.getReg(.rax, inst); + try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); + + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + + const signedness = ty.intInfo(self.target.*).signedness; + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .imul, + .unsigned => .mul, + }, ty, signedness, lhs, rhs); + break :result MCValue{ .register = .rax }; + }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -1476,23 +1492,33 @@ fn airSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + const result = if (self.liveness.isUnused(inst)) .dead else result: { + const ty = self.air.typeOf(bin_op.lhs); + const signedness: std.builtin.Signedness = blk: { + if (ty.zigTypeTag() != .Int) { + return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()}); + } + break :blk ty.intInfo(self.target.*).signedness; + }; - if (self.liveness.isUnused(inst)) { - return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); - } + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. + try self.register_manager.getReg(.rax, inst); + try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); - const ty = self.air.typeOf(bin_op.lhs); - const signedness: std.builtin.Signedness = blk: { - if (ty.zigTypeTag() != .Int) { - return self.fail("TODO implement airMulWithOverflow for type {}", .{ty.fmtDebug()}); - } - break :blk ty.intInfo(self.target.*).signedness; - }; + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); - const partial = try self.genBinMathOp(inst, bin_op.lhs, bin_op.rhs); - const result: MCValue = switch (signedness) { - .signed => .{ .register_overflow_signed = partial.register }, - .unsigned => .{ .register_overflow_unsigned = partial.register }, + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .imul, + .unsigned => .mul, + }, ty, signedness, lhs, rhs); + + switch (signedness) { + .signed => break :result MCValue{ .register_overflow_signed = .rax }, + .unsigned => break :result MCValue{ .register_overflow_unsigned = .rax }, + } }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1503,11 +1529,12 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO implement airShlWithOverflow for {}", .{self.target.cpu.arch}); } -/// Generates signed or unsigned integer division. -/// Requires use of .rax and .rdx registers. Spills them if necessary. +/// Generates signed or unsigned integer multiplication/division. +/// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntDivOpMir( +fn genIntMulDivOpMir( self: *Self, + tag: Mir.Inst.Tag, ty: Type, signedness: std.builtin.Signedness, lhs: MCValue, @@ -1515,25 +1542,10 @@ fn genIntDivOpMir( ) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); if (abi_size > 8) { - return self.fail("TODO implement genIntDivOpMir for ABI size larger than 8", .{}); + return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); - self.register_manager.freezeRegs(&.{ .rax, .rdx }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); - - const dividend = switch (lhs) { - .register => lhs, - else => blk: { - const reg = try self.copyToTmpRegister(ty, lhs); - break :blk MCValue{ .register = reg }; - }, - }; - try self.genSetReg(ty, .rax, dividend); - - self.register_manager.freezeRegs(&.{dividend.register}); - defer self.register_manager.unfreezeRegs(&.{dividend.register}); + try self.genSetReg(ty, .rax, lhs); switch (signedness) { .signed => { @@ -1557,22 +1569,19 @@ fn genIntDivOpMir( }, } - const divisor = switch (rhs) { + const factor = switch (rhs) { .register => rhs, + .stack_offset => rhs, else => blk: { const reg = try self.copyToTmpRegister(ty, rhs); break :blk MCValue{ .register = reg }; }, }; - const op_tag: Mir.Inst.Tag = switch (signedness) { - .signed => .idiv, - .unsigned => .div, - }; - switch (divisor) { + switch (factor) { .register => |reg| { _ = try self.addInst(.{ - .tag = op_tag, + .tag = tag, .ops = (Mir.Ops{ .reg1 = reg, }).encode(), @@ -1581,7 +1590,7 @@ fn genIntDivOpMir( }, .stack_offset => |off| { _ = try self.addInst(.{ - .tag = op_tag, + .tag = tag, .ops = (Mir.Ops{ .reg2 = .rbp, .flags = switch (abi_size) { @@ -1599,6 +1608,7 @@ fn genIntDivOpMir( } } +/// Clobbers .rax and .rdx registers. fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { const signedness = ty.intInfo(self.target.*).signedness; const dividend = switch (lhs) { @@ -1614,7 +1624,10 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa self.register_manager.freezeRegs(&.{divisor}); defer self.register_manager.unfreezeRegs(&.{ dividend, divisor }); - try self.genIntDivOpMir(Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor }); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, Type.isize, signedness, .{ .register = dividend }, .{ .register = divisor }); _ = try self.addInst(.{ .tag = .xor, @@ -1666,22 +1679,53 @@ fn airDiv(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO implement {}", .{tag}); } + const signedness = ty.intInfo(self.target.*).signedness; + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. - try self.register_manager.getReg(.rax, null); + const track_rax: ?Air.Inst.Index = blk: { + if (signedness == .unsigned) break :blk inst; + switch (tag) { + .div_exact, .div_trunc => break :blk inst, + else => break :blk null, + } + }; + try self.register_manager.getReg(.rax, track_rax); try self.register_manager.getReg(.rdx, null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + lhs.freezeIfRegister(&self.register_manager); + defer lhs.unfreezeIfRegister(&self.register_manager); + + const rhs = blk: { + const rhs = try self.resolveInst(bin_op.rhs); + if (signedness == .signed) { + switch (tag) { + .div_floor => { + rhs.freezeIfRegister(&self.register_manager); + defer rhs.unfreezeIfRegister(&self.register_manager); + break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs); + }, + else => {}, + } + } + break :blk rhs; + }; + rhs.freezeIfRegister(&self.register_manager); + defer rhs.unfreezeIfRegister(&self.register_manager); - const signedness = ty.intInfo(self.target.*).signedness; if (signedness == .unsigned) { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(.div, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rax }; } switch (tag) { .div_exact, .div_trunc => { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rax }; }, .div_floor => { @@ -1702,11 +1746,18 @@ fn airRem(self: *Self, inst: Air.Inst.Index) !void { } // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); + try self.register_manager.getReg(.rdx, inst); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); + const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); + const signedness = ty.intInfo(self.target.*).signedness; - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rdx }; }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1719,25 +1770,33 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { if (ty.zigTypeTag() != .Int) { return self.fail("TODO implement .mod for operands of dst type {}", .{ty.zigTypeTag()}); } + const signedness = ty.intInfo(self.target.*).signedness; + // Spill .rax and .rdx upfront to ensure we don't spill the operands too late. try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, null); + try self.register_manager.getReg(.rdx, if (signedness == .unsigned) inst else null); + self.register_manager.freezeRegs(&.{ .rax, .rdx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx }); + const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const signedness = ty.intInfo(self.target.*).signedness; + switch (signedness) { .unsigned => { - try self.genIntDivOpMir(ty, signedness, lhs, rhs); + try self.genIntMulDivOpMir(switch (signedness) { + .signed => .idiv, + .unsigned => .div, + }, ty, signedness, lhs, rhs); break :result MCValue{ .register = .rdx }; }, .signed => { const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs); - try self.genIMulOpMir(ty, div_floor, rhs); + try self.genIntMulComplexOpMir(ty, div_floor, rhs); - const reg = try self.copyToTmpRegister(ty, lhs); - try self.genBinMathOpMir(.sub, ty, .{ .register = reg }, div_floor); + const result = try self.copyToRegisterWithInstTracking(inst, ty, lhs); + try self.genBinMathOpMir(.sub, ty, result, div_floor); - break :result MCValue{ .register = reg }; + break :result result; }, } }; @@ -2134,7 +2193,7 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register { const reg = try self.copyToTmpRegister(index_ty, index); - try self.genIMulOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); + try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); return reg; } @@ -3098,7 +3157,6 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: .bool_or, .bit_or => try self.genBinMathOpMir(.@"or", dst_ty, dst_mcv, src_mcv), .bool_and, .bit_and => try self.genBinMathOpMir(.@"and", dst_ty, dst_mcv, src_mcv), .xor, .not => try self.genBinMathOpMir(.xor, dst_ty, dst_mcv, src_mcv), - .mul, .mulwrap, .mul_with_overflow => try self.genIMulOpMir(dst_ty, dst_mcv, src_mcv), else => unreachable, } return dst_mcv; @@ -3254,8 +3312,9 @@ fn genBinMathOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MC } } -// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. -fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { +/// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. +/// Does not support byte-size operands. +fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); switch (dst_mcv) { .none => unreachable, @@ -3301,7 +3360,7 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) ! } else { // TODO verify we don't spill and assign to the same register as dst_mcv const src_reg = try self.copyToTmpRegister(dst_ty, src_mcv); - return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); + return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, .stack_offset => |off| { @@ -3458,15 +3517,20 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. try self.spillCompareFlagsIfOccupied(); + for (caller_preserved_regs) |reg| { + try self.register_manager.getReg(reg, null); + } + if (info.return_value == .stack_offset) { const ret_ty = fn_ty.fnReturnType(); const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align)); + log.debug("airCall: return value on stack at offset {}", .{stack_offset}); try self.register_manager.getReg(.rdi, null); - self.register_manager.freezeRegs(&.{.rdi}); try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset }); + self.register_manager.freezeRegs(&.{.rdi}); info.return_value.stack_offset = stack_offset; } @@ -3653,15 +3717,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const result: MCValue = result: { switch (info.return_value) { - .register => |reg| { - if (RegisterManager.indexOfReg(&callee_preserved_regs, reg) == null) { - // Save function return value in a callee saved register - break :result try self.copyToRegisterWithInstTracking( - inst, - self.air.typeOfIndex(inst), - info.return_value, - ); - } + .register => { + // Save function return value in a new register + break :result try self.copyToRegisterWithInstTracking( + inst, + self.air.typeOfIndex(inst), + info.return_value, + ); }, else => {}, } @@ -3688,19 +3750,11 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { const ret_ty = self.fn_type.fnReturnType(); switch (self.ret_mcv) { .stack_offset => { - // TODO audit register allocation! - self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi }); - const reg = try self.register_manager.allocReg(null); - const backpatch = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = reg, - .reg2 = .rdi, - }).encode(), - .data = undefined, - }); - try self.ret_backpatches.append(self.gpa, backpatch); + self.register_manager.freezeRegs(&.{ .rax, .rcx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); + self.register_manager.freezeRegs(&.{reg}); + defer self.register_manager.unfreezeRegs(&.{reg}); try self.genSetStack(ret_ty, 0, operand, .{ .source_stack_base = .rbp, .dest_stack_base = reg, @@ -3731,19 +3785,11 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const elem_ty = ptr_ty.elemType(); switch (self.ret_mcv) { .stack_offset => { - // TODO audit register allocation! - self.register_manager.freezeRegs(&.{ .rax, .rcx, .rdi }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx, .rdi }); - const reg = try self.register_manager.allocReg(null); - const backpatch = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = reg, - .reg2 = .rdi, - }).encode(), - .data = undefined, - }); - try self.ret_backpatches.append(self.gpa, backpatch); + self.register_manager.freezeRegs(&.{ .rax, .rcx }); + defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); + self.register_manager.freezeRegs(&.{reg}); + defer self.register_manager.unfreezeRegs(&.{reg}); try self.genInlineMemcpy(.{ .stack_offset = 0 }, ptr, .{ .immediate = elem_ty.abiSize(self.target.*) }, .{ .source_stack_base = .rbp, .dest_stack_base = reg, @@ -5033,6 +5079,7 @@ const InlineMemcpyOpts = struct { dest_stack_base: ?Register = null, }; +/// Spills .rax and .rcx. fn genInlineMemcpy( self: *Self, dst_ptr: MCValue, @@ -5040,13 +5087,7 @@ fn genInlineMemcpy( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - // TODO this is wrong. We should check first if any of the operands is in `.rax` or `.rcx` before - // spilling. Consolidate with other TODOs regarding register allocation mechanics. - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rcx, null); - self.register_manager.freezeRegs(&.{ .rax, .rcx }); - defer self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); if (opts.source_stack_base) |reg| self.register_manager.freezeRegs(&.{reg}); defer if (opts.source_stack_base) |reg| self.register_manager.unfreezeRegs(&.{reg}); @@ -5086,7 +5127,6 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr}); }, } - self.register_manager.freezeRegs(&.{dst_addr_reg}); defer self.register_manager.unfreezeRegs(&.{dst_addr_reg}); @@ -5122,7 +5162,6 @@ fn genInlineMemcpy( return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr}); }, } - self.register_manager.freezeRegs(&.{src_addr_reg}); defer self.register_manager.unfreezeRegs(&.{src_addr_reg}); @@ -5130,6 +5169,11 @@ fn genInlineMemcpy( const count_reg = regs[0].to64(); const tmp_reg = regs[1].to8(); + self.register_manager.unfreezeRegs(&.{ .rax, .rcx }); + + try self.register_manager.getReg(.rax, null); + try self.register_manager.getReg(.rcx, null); + try self.genSetReg(Type.usize, count_reg, len); // mov rcx, 0 @@ -5225,6 +5269,7 @@ fn genInlineMemcpy( try self.performReloc(loop_reloc); } +/// Spills .rax register. fn genInlineMemset( self: *Self, dst_ptr: MCValue, @@ -5232,9 +5277,7 @@ fn genInlineMemset( len: MCValue, opts: InlineMemcpyOpts, ) InnerError!void { - try self.register_manager.getReg(.rax, null); self.register_manager.freezeRegs(&.{.rax}); - defer self.register_manager.unfreezeRegs(&.{.rax}); const addr_reg = try self.register_manager.allocReg(null); switch (dst_ptr) { @@ -5271,6 +5314,9 @@ fn genInlineMemset( self.register_manager.freezeRegs(&.{addr_reg}); defer self.register_manager.unfreezeRegs(&.{addr_reg}); + self.register_manager.unfreezeRegs(&.{.rax}); + try self.register_manager.getReg(.rax, null); + try self.genSetReg(Type.usize, .rax, len); try self.genBinMathOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 }); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 8d478f0a64..e8ea10bf29 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -145,6 +145,7 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .sar => try emit.mirShift(.sar, inst), .imul => try emit.mirMulDiv(.imul, inst), + .mul => try emit.mirMulDiv(.mul, inst), .idiv => try emit.mirMulDiv(.idiv, inst), .div => try emit.mirMulDiv(.div, inst), .imul_complex => try emit.mirIMulComplex(inst), @@ -1164,6 +1165,7 @@ const Tag = enum { brk, nop, imul, + mul, idiv, div, syscall, @@ -1411,7 +1413,7 @@ inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode { .setnl, .setge => OpCode.twoByte(0x0f, 0x9d), .setle, .setng => OpCode.twoByte(0x0f, 0x9e), .setnle, .setg => OpCode.twoByte(0x0f, 0x9f), - .idiv, .div, .imul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), + .idiv, .div, .imul, .mul => OpCode.oneByte(if (is_one_byte) 0xf6 else 0xf7), .fisttp16 => OpCode.oneByte(0xdf), .fisttp32 => OpCode.oneByte(0xdb), .fisttp64 => OpCode.oneByte(0xdd), @@ -1554,9 +1556,10 @@ inline fn getModRmExt(tag: Tag) ?u3 { => 0x4, .shr => 0x5, .sar => 0x7, + .mul => 0x4, .imul => 0x5, - .idiv => 0x7, .div => 0x6, + .idiv => 0x7, .fisttp16 => 0x1, .fisttp32 => 0x1, .fisttp64 => 0x1, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 96a8eec93d..183a76e4b7 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -227,6 +227,7 @@ pub const Inst = struct { /// 0b11 qword ptr [reg2 + imm32] imul, idiv, + mul, div, /// ops flags: form: diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 9fd09ab60a..4afa3ad792 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -370,8 +370,13 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class { } } -/// These registers need to be preserved (saved on the stack) and restored by the callee before getting clobbered -/// and when the callee returns. -pub const callee_preserved_regs = [_]Register{ .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; +/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them +/// for anything else but stack offset tracking therefore we exclude them from this set. +pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 }; +/// These registers need to be preserved (saved on the stack) and restored by the caller before +/// the caller relinquishes control to a subroutine via call instruction (or similar). +/// In other words, these registers are free to use by the callee. +pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 }; +pub const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs; pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; |
