diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-05-03 15:12:20 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-03 15:12:20 -0700 |
| commit | f05cd008d89f36da0aaea315dd480edeb7870a45 (patch) | |
| tree | bac1c8e73a43ab9a855107776ff3223528426b84 /src/arch | |
| parent | 8d6336420b937075e3363f9548adb0092af7f819 (diff) | |
| parent | a19faa2481e84e065a8762cb7c7cbf35426929fd (diff) | |
| download | zig-f05cd008d89f36da0aaea315dd480edeb7870a45.tar.gz zig-f05cd008d89f36da0aaea315dd480edeb7870a45.zip | |
Merge pull request #15559 from jacobly0/x86_64-behavior
x86_64: more behavior
Diffstat (limited to 'src/arch')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 312 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 16 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 23 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 34 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 36 |
5 files changed, 323 insertions, 98 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index dd093508b1..97e672b71f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -229,6 +229,14 @@ pub const MCValue = union(enum) { fn isRegister(mcv: MCValue) bool { return switch (mcv) { .register => true, + .register_offset => |reg_off| return reg_off.off == 0, + else => false, + }; + } + + fn isRegisterOffset(mcv: MCValue) bool { + return switch (mcv) { + .register, .register_offset => true, else => false, }; } @@ -1202,6 +1210,28 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) ! }); } +fn asmRegisterMemoryImmediate( + self: *Self, + tag: Mir.Inst.Tag, + reg: Register, + m: Memory, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag, + .ops = switch (m) { + .sib => .rmi_sib, + .rip => .rmi_rip, + else => unreachable, + }, + .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + } } }, + }); +} + fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void { _ = try self.addInst(.{ .tag = tag, @@ -1442,7 +1472,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .shl_sat => try self.airShlSat(inst), .slice => try self.airSlice(inst), - .sqrt, .sin, .cos, .tan, @@ -1451,14 +1480,14 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .log, .log2, .log10, - .fabs, .floor, .ceil, .round, .trunc_float, => try self.airUnaryMath(inst), - .neg => try self.airNeg(inst), + .sqrt => try self.airSqrt(inst), + .neg, .fabs => try self.airFloatSign(inst), .add_with_overflow => try self.airAddSubWithOverflow(inst), .sub_with_overflow => try self.airAddSubWithOverflow(inst), @@ -1944,7 +1973,7 @@ fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, try self.regClassForType(elem_ty))) |reg| { + if (self.register_manager.tryAllocReg(inst, regClassForType(elem_ty))) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } @@ -1954,14 +1983,9 @@ fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ return .{ .load_frame = .{ .index = frame_index } }; } -fn regClassForType(self: *Self, ty: Type) !RegisterManager.RegisterBitSet { +fn regClassForType(ty: Type) RegisterManager.RegisterBitSet { return switch (ty.zigTypeTag()) { - .Vector => self.fail("TODO regClassForType for {}", .{ty.fmt(self.bin_file.options.module.?)}), - .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) sse else gp, - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) sse else gp, - else => gp, - }, + .Float, .Vector => sse, else => gp, }; } @@ -2104,7 +2128,7 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null, try self.regClassForType(ty)); + const reg = try self.register_manager.allocReg(null, regClassForType(ty)); try self.genSetReg(reg, ty, mcv); return reg; } @@ -2119,7 +2143,7 @@ fn copyToRegisterWithInstTracking( ty: Type, mcv: MCValue, ) !MCValue { - const reg: Register = try self.register_manager.allocReg(reg_owner, try self.regClassForType(ty)); + const reg: Register = try self.register_manager.allocReg(reg_owner, regClassForType(ty)); try self.genSetReg(reg, ty, mcv); return MCValue{ .register = reg }; } @@ -2152,8 +2176,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) return self.fail("TODO implement airFptrunc from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); const src_mcv = try self.resolveInst(ty_op.operand); @@ -2175,8 +2198,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) return self.fail("TODO implement airFpext from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); const src_mcv = try self.resolveInst(ty_op.operand); @@ -3502,17 +3524,7 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { defer self.register_manager.unlockReg(offset_reg_lock); const addr_reg = try self.register_manager.allocReg(null, gp); - switch (slice_mcv) { - .load_frame => |frame_addr| try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), - ), - else => return self.fail("TODO implement slice_elem_ptr when slice is {}", .{slice_mcv}), - } + try self.genSetReg(addr_reg, Type.usize, slice_mcv); // TODO we could allocate register here, but need to expect addr register and potentially // offset register. try self.genBinOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg }, .{ @@ -4188,7 +4200,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airNeg(self: *Self, inst: Air.Inst.Index) !void { +fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); const ty_bits = ty.floatBits(self.target.*); @@ -4231,16 +4243,46 @@ fn airNeg(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_mcv.register); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const tag = self.air.instructions.items(.tag)[inst]; try self.genBinOpMir(switch (ty_bits) { - 32 => .xorps, - 64 => .xorpd, - else => return self.fail("TODO implement airNeg for {}", .{ + // No point using an extra prefix byte for *pd which performs the same operation. + 32, 64 => switch (tag) { + .neg => .xorps, + .fabs => .andnps, + else => unreachable, + }, + else => return self.fail("TODO implement airFloatSign for {}", .{ ty.fmt(self.bin_file.options.module.?), }), }, vec_ty, dst_mcv, sign_mcv); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } +fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.air.typeOf(un_op); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + + try self.genBinOpMir(switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .sqrtss, + 64 => .sqrtsd, + else => return self.fail("TODO implement airSqrt for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, + else => return self.fail("TODO implement airSqrt for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, ty, dst_mcv, src_mcv); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; _ = un_op; @@ -4409,8 +4451,8 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const ptr_ty = self.air.typeOf(ty_op.operand); const elem_size = elem_ty.abiSize(self.target.*); - const elem_rc = try self.regClassForType(elem_ty); - const ptr_rc = try self.regClassForType(ptr_ty); + const elem_rc = regClassForType(elem_ty); + const ptr_rc = regClassForType(ptr_ty); const ptr_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and @@ -4782,10 +4824,21 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFieldParentPtr for {}", .{self.target.cpu.arch}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; + + const inst_ty = self.air.typeOfIndex(inst); + const parent_ty = inst_ty.childType(); + const field_offset = @intCast(i32, parent_ty.structFieldOffset(extra.field_index, self.target.*)); + + const src_mcv = try self.resolveInst(extra.field_ptr); + const dst_mcv = if (src_mcv.isRegisterOffset() and + self.reuseOperand(inst, extra.field_ptr, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv); + const result = dst_mcv.offset(-field_offset); + return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none }); } fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { @@ -5219,8 +5272,7 @@ fn genMulDivBinOp( .mul, .mulwrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2, .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size, } or src_abi_size > 8) return self.fail("TODO implement genMulDivBinOp from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); const ty = if (dst_abi_size <= 8) dst_ty else src_ty; const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size; @@ -5520,7 +5572,9 @@ fn genBinOp( }, lhs_ty, dst_mcv, src_mcv), .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), .Float => switch (lhs_ty.floatBits(self.target.*)) { 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) .mulss @@ -5723,9 +5777,13 @@ fn genBinOp( .max => .maxsd, else => unreachable, }, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), }, lhs_ty, dst_mcv, src_mcv), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), }, else => unreachable, @@ -5764,8 +5822,7 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .Float => { if (!Target.x86.featureSetHas(self.target.cpu.features, .sse)) return self.fail("TODO genBinOpMir for {s} {} without sse", .{ - @tagName(mir_tag), - ty.fmt(self.bin_file.options.module.?), + @tagName(mir_tag), ty.fmt(self.bin_file.options.module.?), }); return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); }, @@ -5863,7 +5920,12 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .load_got, .load_tlv, => { - const addr_reg = try self.copyToTmpRegister(ty, src_mcv.address()); + var ptr_pl = Type.Payload.ElemType{ + .base = .{ .tag = .single_const_pointer }, + .data = ty, + }; + const ptr_ty = Type.initPayload(&ptr_pl.base); + const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address()); return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg }, }); @@ -7545,10 +7607,11 @@ fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag { return switch (ty.zigTypeTag()) { else => .mov, .Float => switch (ty.floatBits(self.target.*)) { - 16 => .mov, - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) .movss else .mov, - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) .movsd else .mov, - else => return self.fail("TODO movMirTag for {}", .{ + 16 => unreachable, // needs special handling + 32 => .movss, + 64 => .movsd, + 128 => .movaps, + else => return self.fail("TODO movMirTag from {}", .{ ty.fmt(self.bin_file.options.module.?), }), }, @@ -7657,8 +7720,17 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }, .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) - try self.movMirTag(ty) + switch (ty.zigTypeTag()) { + else => .mov, + .Float, .Vector => .movaps, + } else switch (abi_size) { + 2 => return try self.asmRegisterRegisterImmediate( + if (dst_reg.class() == .floating_point) .pinsrw else .pextrw, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + Immediate.u(0), + ), 4 => .movd, 8 => .movq, else => return self.fail( @@ -7669,18 +7741,12 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr registerAlias(dst_reg, abi_size), registerAlias(src_reg, abi_size), ), - .register_offset, .indirect, .load_frame, .lea_frame => try self.asmRegisterMemory( - switch (src_mcv) { - .register_offset => |reg_off| switch (reg_off.off) { - 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), - else => .lea, - }, - .indirect, .load_frame => try self.movMirTag(ty), - .lea_frame => .lea, - else => unreachable, - }, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .register_offset, + .indirect, + .load_frame, + .lea_frame, + => { + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .register_offset, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .disp = reg_off.off, @@ -7690,20 +7756,51 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .disp = frame_addr.off, }, else => unreachable, - }), - ), + }); + if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) + try self.asmRegisterMemoryImmediate( + .pinsrw, + registerAlias(dst_reg, abi_size), + src_mem, + Immediate.u(0), + ) + else + try self.asmRegisterMemory( + switch (src_mcv) { + .register_offset => |reg_off| switch (reg_off.off) { + 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), + else => .lea, + }, + .indirect, .load_frame => try self.movMirTag(ty), + .lea_frame => .lea, + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + src_mem, + ); + }, .memory, .load_direct, .load_got, .load_tlv => { switch (src_mcv) { - .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| - return self.asmRegisterMemory( - try self.movMirTag(ty), - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .{ .reg = .ds }, - .disp = small_addr, - }), - ), - .load_direct => |sym_index| if (try self.movMirTag(ty) == .mov) { + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| { + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = .ds }, + .disp = small_addr, + }); + return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) + self.asmRegisterMemoryImmediate( + .pinsrw, + registerAlias(dst_reg, abi_size), + src_mem, + Immediate.u(0), + ) + else + self.asmRegisterMemory( + try self.movMirTag(ty), + registerAlias(dst_reg, abi_size), + src_mem, + ); + }, + .load_direct => |sym_index| if (!ty.isRuntimeFloat()) { const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = .mov_linker, @@ -7724,11 +7821,22 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - try self.asmRegisterMemory( - try self.movMirTag(ty), - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }), - ); + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = addr_reg }, + }); + if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) + try self.asmRegisterMemoryImmediate( + .pinsrw, + registerAlias(dst_reg, abi_size), + src_mem, + Immediate.u(0), + ) + else + try self.asmRegisterMemory( + try self.movMirTag(ty), + registerAlias(dst_reg, abi_size), + src_mem, + ); }, .lea_direct, .lea_got => |sym_index| { const atom_index = try self.owner.getSymbolIndex(self); @@ -7821,11 +7929,25 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal }, }, .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc), - .register => |reg| try self.asmMemoryRegister( - try self.movMirTag(ty), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), - registerAlias(reg, abi_size), - ), + .register => |src_reg| { + const dst_mem = Memory.sib( + Memory.PtrSize.fromSize(abi_size), + .{ .base = base, .disp = disp }, + ); + if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) + try self.asmMemoryRegisterImmediate( + .pextrw, + dst_mem, + registerAlias(src_reg, abi_size), + Immediate.u(0), + ) + else + try self.asmMemoryRegister( + try self.movMirTag(ty), + dst_mem, + registerAlias(src_reg, abi_size), + ); + }, .register_overflow => |ro| { try self.genSetMem( base, @@ -8028,8 +8150,8 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const src_ty = self.air.typeOf(ty_op.operand); const result = result: { - const dst_rc = try self.regClassForType(dst_ty); - const src_rc = try self.regClassForType(src_ty); + const dst_rc = regClassForType(dst_ty); + const src_rc = regClassForType(src_ty); const operand = try self.resolveInst(ty_op.operand); if (dst_rc.supersetOf(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; @@ -8084,8 +8206,7 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { .unsigned => src_bits + 1, }, 32), 8) catch unreachable; if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); const src_mcv = try self.resolveInst(ty_op.operand); @@ -8098,7 +8219,7 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); - const dst_reg = try self.register_manager.allocReg(inst, try self.regClassForType(dst_ty)); + const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty)); const dst_mcv = MCValue{ .register = dst_reg }; const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); @@ -8108,19 +8229,16 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { .cvtsi2ss else return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) .cvtsi2sd else return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), else => return self.fail("TODO implement airIntToFloat from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), }, dst_reg.to128(), registerAlias(src_reg, src_size)); @@ -9000,7 +9118,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data; _ = extra; - return self.fail("TODO implement airAggregateInit for x86_64", .{}); + return self.fail("TODO implement airUnionInit for x86_64", .{}); //return self.finishAir(inst, result, .{ extra.init, .none, .none }); } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 5cb7f7a2d9..944fe85458 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -268,23 +268,37 @@ pub const Mnemonic = enum { movd, // SSE addss, + andps, + andnps, cmpss, cvtsi2ss, divss, maxss, minss, - movss, + movaps, movss, movups, mulss, + orps, + pextrw, + pinsrw, + sqrtps, + sqrtss, subss, ucomiss, xorps, // SSE2 addsd, + andpd, + andnpd, //cmpsd, cvtsd2ss, cvtsi2sd, cvtss2sd, divsd, maxsd, minsd, + movapd, movq, //movd, movsd, + movupd, mulsd, + orpd, + sqrtpd, + sqrtsd, subsd, ucomisd, xorpd, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index a961100687..4289cfaf2a 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -94,18 +94,29 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .xor, .addss, + .andnps, + .andps, .cmpss, .cvtsi2ss, .divss, .maxss, .minss, + .movaps, .movss, + .movups, .mulss, + .orps, + .pextrw, + .pinsrw, .roundss, + .sqrtps, + .sqrtss, .subss, .ucomiss, .xorps, .addsd, + .andnpd, + .andpd, .cmpsd, .cvtsd2ss, .cvtsi2sd, @@ -115,7 +126,10 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .minsd, .movsd, .mulsd, + .orpd, .roundsd, + .sqrtpd, + .sqrtsd, .subsd, .ucomisd, .xorpd, @@ -188,6 +202,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .mi_rip_u, .lock_mi_sib_u, .lock_mi_rip_u, + .rmi_sib, + .rmi_rip, .mri_sib, .mri_rip, => Immediate.u(i), @@ -202,6 +218,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, .rm_sib_cc, + .rmi_sib, .m_sib, .m_sib_cc, .mi_sib_u, @@ -217,6 +234,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .rm_rip, .rm_rip_cc, + .rmi_rip, .m_rip, .m_rip_cc, .mi_rip_u, @@ -311,6 +329,11 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rx.r }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }, + .rmi_sib, .rmi_rip => &.{ + .{ .reg = inst.data.rix.r }, + .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, + .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, + }, .mr_sib, .lock_mr_sib, .mr_rip, .lock_mr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, .{ .reg = inst.data.rx.r }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c14338b13d..6b2db1b696 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -168,6 +168,10 @@ pub const Inst = struct { /// Add single precision floating point values addss, + /// Bitwise logical and of packed single precision floating-point values + andps, + /// Bitwise logical and not of packed single precision floating-point values + andnps, /// Compare scalar single-precision floating-point values cmpss, /// Convert doubleword integer to scalar single-precision floating-point value @@ -178,13 +182,27 @@ pub const Inst = struct { maxss, /// Return minimum single-precision floating-point value minss, + /// Move aligned packed single-precision floating-point values + movaps, /// Move scalar single-precision floating-point value movss, + /// Move unaligned packed single-precision floating-point values + movups, /// Multiply scalar single-precision floating-point values mulss, + /// Bitwise logical or of packed single precision floating-point values + orps, + /// Extract word + pextrw, + /// Insert word + pinsrw, /// Round scalar single-precision floating-point values roundss, + /// Square root of scalar single precision floating-point value + sqrtps, /// Subtract scalar single-precision floating-point values + sqrtss, + /// Square root of single precision floating-point values subss, /// Unordered compare scalar single-precision floating-point values ucomiss, @@ -192,6 +210,10 @@ pub const Inst = struct { xorps, /// Add double precision floating point values addsd, + /// Bitwise logical and not of packed double precision floating-point values + andnpd, + /// Bitwise logical and of packed double precision floating-point values + andpd, /// Compare scalar double-precision floating-point values cmpsd, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value @@ -210,8 +232,14 @@ pub const Inst = struct { movsd, /// Multiply scalar double-precision floating-point values mulsd, + /// Bitwise logical or of packed double precision floating-point values + orpd, /// Round scalar double-precision floating-point values roundsd, + /// Square root of double precision floating-point values + sqrtpd, + /// Square root of scalar double precision floating-point value + sqrtsd, /// Subtract scalar double-precision floating-point values subsd, /// Unordered compare scalar double-precision floating-point values @@ -326,6 +354,12 @@ pub const Inst = struct { /// Register, memory (RIP) operands with condition code (CC). /// Uses `rx_cc` payload. rm_rip_cc, + /// Register, memory (SIB), immediate (byte) operands. + /// Uses `rix` payload with extra data of type `MemorySib`. + rmi_sib, + /// Register, memory (RIP), immediate (byte) operands. + /// Uses `rix` payload with extra data of type `MemoryRip`. + rmi_rip, /// Single memory (SIB) operand. /// Uses `payload` with extra data of type `MemorySib`. m_sib, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index ac427c3633..f87a110e99 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -832,6 +832,10 @@ pub const table = [_]Entry{ // SSE .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse }, + .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse }, + + .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse }, + .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse }, @@ -843,13 +847,24 @@ pub const table = [_]Entry{ .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .sse }, + .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .sse }, + .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .sse }, .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .sse }, + .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .sse }, + .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .sse }, + .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse }, + .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse }, + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse }, + .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .sse }, + .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .sse }, + .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse }, @@ -857,6 +872,10 @@ pub const table = [_]Entry{ // SSE2 .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, + .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 }, + + .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 }, + .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 }, @@ -872,6 +891,9 @@ pub const table = [_]Entry{ .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .sse2 }, + .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .sse2 }, + .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .sse2 }, + .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2 }, .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2 }, @@ -881,8 +903,20 @@ pub const table = [_]Entry{ .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .sse2 }, .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .sse2 }, + .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .sse2 }, + .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .sse2 }, + .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 }, + .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 }, + + .{ .pextrw, .mri, &.{ .r16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .sse2 }, + + .{ .pinsrw, .rmi, &.{ .xmm, .rm16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .sse2 }, + + .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .sse2 }, + .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .sse2 }, + .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 }, .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 }, @@ -893,6 +927,8 @@ pub const table = [_]Entry{ .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 }, // SSE4.1 + .{ .pextrw, .mri, &.{ .rm16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .sse4_1 }, + .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, }; |
