diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-08 07:35:31 -0400 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-08 07:36:20 -0400 |
| commit | 1f5aa7747f5710e281cd2190508ce562a4bfd35f (patch) | |
| tree | ff832710e67a7ac1256ed0ced050db411fd23551 /src | |
| parent | ecb5feaf94bf49dc4c180f09c170223d6c1898b3 (diff) | |
| download | zig-1f5aa7747f5710e281cd2190508ce562a4bfd35f.tar.gz zig-1f5aa7747f5710e281cd2190508ce562a4bfd35f.zip | |
x86_64: finish optimizing mir tag usage
Final tag count is 95.
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 368 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 364 |
2 files changed, 266 insertions, 466 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 147be62e28..2dc1cc8ee4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2443,7 +2443,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, mat_src_reg.to128(), Immediate.u(0b1_00), @@ -2455,12 +2455,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } } else if (src_bits == 64 and dst_bits == 32) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ ._, .vcvtsd2ss }, + .{ .v_, .cvtsd2ss }, dst_reg, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vcvtsd2ss }, + .{ .v_, .cvtsd2ss }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2506,22 +2506,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); switch (dst_bits) { 32 => {}, - 64 => try self.asmRegisterRegisterRegister(.{ ._, .vcvtss2sd }, dst_reg, dst_reg, dst_reg), + 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg), else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), } } else if (src_bits == 32 and dst_bits == 64) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ ._, .vcvtss2sd }, + .{ .v_, .cvtss2sd }, dst_reg, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vcvtss2sd }, + .{ .v_, .cvtss2sd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -4678,8 +4678,8 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir(switch (ty_bits) { // No point using an extra prefix byte for *pd which performs the same operation. 16, 32, 64, 128 => switch (tag) { - .neg => .{ ._, .xorps }, - .fabs => .{ ._, .andnps }, + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .andn }, else => unreachable, }, 80 => return self.fail("TODO implement airFloatSign for {}", .{ @@ -4712,23 +4712,23 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, - 64 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vroundss } else .{ ._, .roundss }, - 2...4 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else .{ ._, .roundps }, - 5...8 => if (self.hasFeature(.avx)) .{ ._, .vroundps } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vroundsd } else .{ ._, .roundsd }, - 2 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else .{ ._, .roundpd }, - 3...4 => if (self.hasFeature(.avx)) .{ ._, .vroundpd } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, else => null, }, 16, 80, 128 => null, @@ -4743,8 +4743,8 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_reg, abi_size); - switch (mir_tag[1]) { - .vroundss, .vroundsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( mir_tag, dst_alias, dst_alias, @@ -4799,18 +4799,18 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, mat_src_reg.to128()); - try self.asmRegisterRegisterRegister(.{ ._, .vsqrtss }, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), ); break :result dst_mcv; } else null, - 32 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, - 64 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, 80, 128 => null, else => unreachable, }, @@ -4819,7 +4819,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4827,13 +4827,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ty, src_mcv)).to128(), ); try self.asmRegisterRegisterRegister( - .{ ._, .vsqrtss }, + .{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -4843,22 +4843,22 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, wide_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vsqrtps }, wide_reg, wide_reg); + try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, wide_reg, Immediate.u(0b1_00), @@ -4868,15 +4868,15 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => null, } else null, 32 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtss } else .{ ._, .sqrtss }, - 2...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else .{ ._, .sqrtps }, - 5...8 => if (self.hasFeature(.avx)) .{ ._, .vsqrtps } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => if (self.hasFeature(.avx)) .{ ._, .vsqrtsd } else .{ ._, .sqrtsd }, - 2 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else .{ ._, .sqrtpd }, - 3...4 => if (self.hasFeature(.avx)) .{ ._, .vsqrtpd } else null, + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, else => null, }, 80, 128 => null, @@ -4888,8 +4888,8 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - switch (mir_tag[1]) { - .vsqrtss, .vsqrtsd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, dst_reg, @@ -6325,13 +6325,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6339,15 +6339,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddss }, - .sub => .{ ._, .vsubss }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, - .max => .{ ._, .vmaxss }, - .min => .{ ._, .vmaxss }, + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, else => unreachable, }, dst_reg, @@ -6355,7 +6355,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6363,29 +6363,29 @@ fn genBinOp( return dst_mcv; } else null, 32 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, else => unreachable, }, 64 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, else => unreachable, }, 80, 128 => null, @@ -6401,13 +6401,13 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, dst_reg, dst_reg, src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6415,15 +6415,15 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddss }, - .sub => .{ ._, .vsubss }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivss }, - .max => .{ ._, .vmaxss }, - .min => .{ ._, .vmaxss }, + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, else => unreachable, }, dst_reg, @@ -6431,7 +6431,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6444,12 +6444,12 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( - .{ ._, .vpinsrd }, + .{ .vp_d, .insr }, dst_reg, src_mcv.mem(.dword), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vunpcklps }, + .{ .v_ps, .unpckl }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -6457,20 +6457,20 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); try self.asmRegisterRegisterRegister( - .{ ._, .vmovhlps }, + .{ .v_ps, .movhl }, tmp_reg, dst_reg, dst_reg, ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg, @@ -6478,7 +6478,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6490,13 +6490,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6505,11 +6505,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg, @@ -6517,7 +6517,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg, Immediate.u(0b1_00), @@ -6529,13 +6529,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, src_mcv.mem(.xword), ) else try self.asmRegisterRegister( - .{ ._, .vcvtph2ps }, + .{ .v_, .cvtph2ps }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6544,11 +6544,11 @@ fn genBinOp( ); try self.asmRegisterRegisterRegister( switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vmaxps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, else => unreachable, }, dst_reg.to256(), @@ -6556,7 +6556,7 @@ fn genBinOp( tmp_reg, ); try self.asmRegisterRegisterImmediate( - .{ ._, .vcvtps2ph }, + .{ .v_, .cvtps2ph }, dst_reg, dst_reg.to256(), Immediate.u(0b1_00), @@ -6567,76 +6567,76 @@ fn genBinOp( } else null, 32 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddss } else .{ ._, .addss }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubss } else .{ ._, .subss }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulss } else .{ ._, .mulss }, + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivss } else .{ ._, .divss }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxss } else .{ ._, .maxss }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminss } else .{ ._, .minss }, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, else => unreachable, }, 2...4 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddps } else .{ ._, .addps }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubps } else .{ ._, .subps }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulps } else .{ ._, .mulps }, + .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivps } else .{ ._, .divps }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxps } else .{ ._, .maxps }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminps } else .{ ._, .minps }, + => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, else => unreachable, }, 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ ._, .vaddps }, - .sub => .{ ._, .vsubps }, - .mul => .{ ._, .vmulps }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivps }, - .max => .{ ._, .vmaxps }, - .min => .{ ._, .vminps }, + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .min }, else => unreachable, } else null, else => null, }, 64 => switch (lhs_ty.vectorLen()) { 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddsd } else .{ ._, .addsd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubsd } else .{ ._, .subsd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulsd } else .{ ._, .mulsd }, + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivsd } else .{ ._, .divsd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxsd } else .{ ._, .maxsd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminsd } else .{ ._, .minsd }, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, else => unreachable, }, 2 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ ._, .vaddpd } else .{ ._, .addpd }, - .sub => if (self.hasFeature(.avx)) .{ ._, .vsubpd } else .{ ._, .subpd }, - .mul => if (self.hasFeature(.avx)) .{ ._, .vmulpd } else .{ ._, .mulpd }, + .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, .div_float, .div_trunc, .div_floor, .div_exact, - => if (self.hasFeature(.avx)) .{ ._, .vdivpd } else .{ ._, .divpd }, - .max => if (self.hasFeature(.avx)) .{ ._, .vmaxpd } else .{ ._, .maxpd }, - .min => if (self.hasFeature(.avx)) .{ ._, .vminpd } else .{ ._, .minpd }, + => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, else => unreachable, }, 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ ._, .vaddpd }, - .sub => .{ ._, .vsubpd }, - .mul => .{ ._, .vmulpd }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ ._, .vdivpd }, - .max => .{ ._, .vmaxpd }, - .min => .{ ._, .vminpd }, + .add => .{ .v_pd, .add }, + .sub => .{ .v_pd, .sub }, + .mul => .{ .v_pd, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, + .max => .{ .v_pd, .max }, + .min => .{ .v_pd, .min }, else => unreachable, } else null, else => null, @@ -7563,13 +7563,13 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer self.register_manager.unlockReg(tmp2_lock); if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( - .{ ._, .vpinsrw }, + .{ .vp_w, .insr }, tmp1_reg, dst_reg.to128(), src_mcv.mem(.word), Immediate.u(1), ) else try self.asmRegisterRegisterRegister( - .{ ._, .vpunpcklwd }, + .{ .vp_, .unpcklwd }, tmp1_reg, dst_reg.to128(), (if (src_mcv.isRegister()) @@ -7577,20 +7577,20 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ ._, .vcvtph2ps }, tmp1_reg, tmp1_reg); - try self.asmRegisterRegister(.{ ._, .vmovshdup }, tmp2_reg, tmp1_reg); - try self.genBinOpMir(.{ ._, .ucomiss }, ty, tmp1_mcv, tmp2_mcv); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); } else return self.fail("TODO implement airCmp for {}", .{ ty.fmt(self.bin_file.options.module.?), }), 32 => try self.genBinOpMir( - .{ ._, .ucomiss }, + .{ ._ss, .ucomi }, ty, .{ .register = dst_reg }, src_mcv, ), 64 => try self.genBinOpMir( - .{ ._, .ucomisd }, + .{ ._sd, .ucomi }, ty, .{ .register = dst_reg }, src_mcv, @@ -8573,42 +8573,42 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { else => return .{ ._, .mov }, .Float => switch (ty.floatBits(self.target.*)) { 16 => unreachable, // needs special handling - 32 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, - 64 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 128 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => switch (ty.vectorLen()) { 1 => unreachable, // needs special handling - 2 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, - 3...4 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 5...8 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 9...16 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, 32 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovss } else .{ ._, .movss }, + 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, 2...4 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 5...8 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, 64 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ ._, .vmovsd } else .{ ._, .movsd }, + 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, 2 => return if (self.hasFeature(.avx)) - if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups } - else if (aligned) .{ ._, .movaps } else .{ ._, .movups }, + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 3...4 => if (self.hasFeature(.avx)) - return if (aligned) .{ ._, .vmovaps } else .{ ._, .vmovups }, + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, else => {}, @@ -8724,11 +8724,11 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) switch (ty.zigTypeTag()) { else => .{ ._, .mov }, - .Float, .Vector => .{ ._, .movaps }, + .Float, .Vector => .{ ._ps, .mova }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .{ ._, .pinsrw } else .{ ._, .pextrw }, + if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr }, registerAlias(dst_reg, 4), registerAlias(src_reg, 4), Immediate.u(0), @@ -8761,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8794,7 +8794,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8838,7 +8838,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .{ ._, .pinsrw }, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -8952,7 +8952,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmMemoryRegisterImmediate( - .{ ._, .pextrw }, + .{ .p_w, .extr }, dst_mem, src_reg.to128(), Immediate.u(0), @@ -9069,7 +9069,7 @@ fn genInlineMemcpyRegisterRegister( try self.asmMemoryRegister( switch (src_reg.class()) { .general_purpose, .segment => .{ ._, .mov }, - .floating_point => .{ ._, .movss }, + .floating_point => .{ ._ss, .mov }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -10197,21 +10197,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd132ss }, - 64 => .{ ._, .vfmadd132sd }, + 32 => .{ .v_ss, .fmadd132 }, + 64 => .{ .v_sd, .fmadd132 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd132ss }, - 2...8 => .{ ._, .vfmadd132ps }, + 1 => .{ .v_ss, .fmadd132 }, + 2...8 => .{ .v_ps, .fmadd132 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd132sd }, - 2...4 => .{ ._, .vfmadd132pd }, + 1 => .{ .v_sd, .fmadd132 }, + 2...4 => .{ .v_pd, .fmadd132 }, else => null, }, 16, 80, 128 => null, @@ -10224,21 +10224,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd213ss }, - 64 => .{ ._, .vfmadd213sd }, + 32 => .{ .v_ss, .fmadd213 }, + 64 => .{ .v_sd, .fmadd213 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd213ss }, - 2...8 => .{ ._, .vfmadd213ps }, + 1 => .{ .v_ss, .fmadd213 }, + 2...8 => .{ .v_ps, .fmadd213 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd213sd }, - 2...4 => .{ ._, .vfmadd213pd }, + 1 => .{ .v_sd, .fmadd213 }, + 2...4 => .{ .v_pd, .fmadd213 }, else => null, }, 16, 80, 128 => null, @@ -10251,21 +10251,21 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .{ ._, .vfmadd231ss }, - 64 => .{ ._, .vfmadd231sd }, + 32 => .{ .v_ss, .fmadd231 }, + 64 => .{ .v_sd, .fmadd231 }, 16, 80, 128 => null, else => unreachable, }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { 32 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd231ss }, - 2...8 => .{ ._, .vfmadd231ps }, + 1 => .{ .v_ss, .fmadd231 }, + 2...8 => .{ .v_ps, .fmadd231 }, else => null, }, 64 => switch (ty.vectorLen()) { - 1 => .{ ._, .vfmadd231sd }, - 2...4 => .{ ._, .vfmadd231pd }, + 1 => .{ .v_sd, .fmadd231 }, + 2...4 => .{ .v_pd, .fmadd231 }, else => null, }, 16, 80, 128 => null, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 6b5e2bded7..0a7b5597b3 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -278,8 +278,14 @@ pub const Inst = struct { /// Add with carry adc, /// Add + /// Add packed single-precision floating-point values + /// Add scalar single-precision floating-point values + /// Add packed double-precision floating-point values + /// Add scalar double-precision floating-point values add, /// Logical and + /// Bitwise logical and of packed single-precision floating-point values + /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward bsf, @@ -304,6 +310,8 @@ pub const Inst = struct { cmov, /// Logical compare /// Compare string + /// Compare scalar single-precision floating-point values + /// Compare scalar double-precision floating-point values cmp, /// Compare and exchange /// Compare and exchange bytes @@ -316,6 +324,10 @@ pub const Inst = struct { cwde, /// Unsigned division /// Signed division + /// Divide packed single-precision floating-point values + /// Divide scalar single-precision floating-point values + /// Divide packed double-precision floating-point values + /// Divide scalar double-precision floating-point values div, /// int3, @@ -339,6 +351,8 @@ pub const Inst = struct { mfence, /// Move /// Move data from string to string + /// Move scalar single-precision floating-point value + /// Move scalar double-precision floating-point value /// Move doubleword /// Move quadword mov, @@ -350,6 +364,10 @@ pub const Inst = struct { movzx, /// Multiply /// Signed multiplication + /// Multiply packed single-precision floating-point values + /// Multiply scalar single-precision floating-point values + /// Multiply packed double-precision floating-point values + /// Multiply scalar double-precision floating-point values mul, /// Two's complement negation neg, @@ -358,6 +376,8 @@ pub const Inst = struct { /// One's complement negation not, /// Logical or + /// Bitwise logical or of packed single-precision floating-point values + /// Bitwise logical or of packed double-precision floating-point values @"or", /// Pop pop, @@ -390,6 +410,10 @@ pub const Inst = struct { /// Double precision shift right sh, /// Subtract + /// Subtract packed single-precision floating-point values + /// Subtract scalar single-precision floating-point values + /// Subtract packed double-precision floating-point values + /// Subtract scalar double-precision floating-point values sub, /// Store string sto, @@ -406,145 +430,88 @@ pub const Inst = struct { /// Exchange register/memory with register xchg, /// Logical exclusive-or + /// Bitwise logical xor of packed single-precision floating-point values + /// Bitwise logical xor of packed double-precision floating-point values xor, - /// Add packed single-precision floating-point values - addps, - /// Add scalar single-precision floating-point values - addss, - /// Bitwise logical and of packed single precision floating-point values - andps, - /// Bitwise logical and not of packed single precision floating-point values - andnps, - /// Compare scalar single-precision floating-point values - cmpss, + /// Bitwise logical and not of packed single-precision floating-point values + /// Bitwise logical and not of packed double-precision floating-point values + andn, /// Convert doubleword integer to scalar single-precision floating-point value cvtsi2ss, - /// Divide packed single-precision floating-point values - divps, - /// Divide scalar single-precision floating-point values - divss, /// Maximum of packed single-precision floating-point values - maxps, /// Maximum of scalar single-precision floating-point values - maxss, + /// Maximum of packed double-precision floating-point values + /// Maximum of scalar double-precision floating-point values + max, /// Minimum of packed single-precision floating-point values - minps, /// Minimum of scalar single-precision floating-point values - minss, + /// Minimum of packed double-precision floating-point values + /// Minimum of scalar double-precision floating-point values + min, /// Move aligned packed single-precision floating-point values - movaps, + /// Move aligned packed double-precision floating-point values + mova, /// Move packed single-precision floating-point values high to low - movhlps, - /// Move scalar single-precision floating-point value - movss, + movhl, /// Move unaligned packed single-precision floating-point values - movups, - /// Multiply packed single-precision floating-point values - mulps, - /// Multiply scalar single-precision floating-point values - mulss, - /// Bitwise logical or of packed single precision floating-point values - orps, + /// Move unaligned packed double-precision floating-point values + movu, + /// Extract byte /// Extract word - pextrw, + /// Extract doubleword + /// Extract quadword + extr, + /// Insert byte /// Insert word - pinsrw, + /// Insert doubleword + /// Insert quadword + insr, /// Square root of packed single-precision floating-point values - sqrtps, /// Square root of scalar single-precision floating-point value - sqrtss, - /// Subtract packed single-precision floating-point values - subps, - /// Subtract scalar single-precision floating-point values - subss, + /// Square root of packed double-precision floating-point values + /// Square root of scalar double-precision floating-point value + sqrt, /// Unordered compare scalar single-precision floating-point values - ucomiss, + /// Unordered compare scalar double-precision floating-point values + ucomi, /// Unpack and interleave high packed single-precision floating-point values - unpckhps, + /// Unpack and interleave high packed double-precision floating-point values + unpckh, /// Unpack and interleave low packed single-precision floating-point values - unpcklps, - /// Bitwise logical xor of packed single precision floating-point values - xorps, + /// Unpack and interleave low packed double-precision floating-point values + unpckl, - /// Add packed double-precision floating-point values - addpd, - /// Add scalar double-precision floating-point values - addsd, - /// Bitwise logical and not of packed double precision floating-point values - andnpd, - /// Bitwise logical and of packed double precision floating-point values - andpd, - /// Compare scalar double-precision floating-point values - cmpsd, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value cvtsd2ss, /// Convert doubleword integer to scalar double-precision floating-point value cvtsi2sd, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value cvtss2sd, - /// Divide packed double-precision floating-point values - divpd, - /// Divide scalar double-precision floating-point values - divsd, - /// Maximum of packed double-precision floating-point values - maxpd, - /// Maximum of scalar double-precision floating-point values - maxsd, - /// Minimum of packed double-precision floating-point values - minpd, - /// Minimum of scalar double-precision floating-point values - minsd, - /// Move scalar double-precision floating-point value - movsd, - /// Multiply packed double-precision floating-point values - mulpd, - /// Multiply scalar double-precision floating-point values - mulsd, - /// Bitwise logical or of packed double precision floating-point values - orpd, /// Shuffle packed high words - pshufhw, + shufh, /// Shuffle packed low words - pshuflw, + shufl, /// Shift packed data right logical - psrld, /// Shift packed data right logical - psrlq, /// Shift packed data right logical - psrlw, + srl, /// Unpack high data - punpckhbw, + unpckhbw, /// Unpack high data - punpckhdq, + unpckhdq, /// Unpack high data - punpckhqdq, + unpckhqdq, /// Unpack high data - punpckhwd, + unpckhwd, /// Unpack low data - punpcklbw, + unpcklbw, /// Unpack low data - punpckldq, + unpckldq, /// Unpack low data - punpcklqdq, + unpcklqdq, /// Unpack low data - punpcklwd, - /// Square root of double precision floating-point values - sqrtpd, - /// Square root of scalar double precision floating-point value - sqrtsd, - /// Subtract packed double-precision floating-point values - subpd, - /// Subtract scalar double-precision floating-point values - subsd, - /// Unordered compare scalar double-precision floating-point values - ucomisd, - /// Unpack and interleave high packed double-precision floating-point values - unpckhpd, - /// Unpack and interleave low packed double-precision floating-point values - unpcklpd, - /// Bitwise logical xor of packed double precision floating-point values - xorpd, + unpcklwd, /// Replicate double floating-point values movddup, @@ -553,199 +520,32 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, - /// Extract Byte - pextrb, - /// Extract Doubleword - pextrd, - /// Extract Quadword - pextrq, - /// Insert Byte - pinsrb, - /// Insert Doubleword - pinsrd, - /// Insert Quadword - pinsrq, - /// Round packed double-precision floating-point values - roundpd, /// Round packed single-precision floating-point values - roundps, - /// Round scalar double-precision floating-point value - roundsd, /// Round scalar single-precision floating-point value - roundss, - - /// Add packed double-precision floating-point values - vaddpd, - /// Add packed single-precision floating-point values - vaddps, - /// Add scalar double-precision floating-point values - vaddsd, - /// Add scalar single-precision floating-point values - vaddss, - /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value - vcvtsd2ss, - /// Convert doubleword integer to scalar double-precision floating-point value - vcvtsi2sd, - /// Convert doubleword integer to scalar single-precision floating-point value - vcvtsi2ss, - /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value - vcvtss2sd, - /// Divide packed double-precision floating-point values - vdivpd, - /// Divide packed single-precision floating-point values - vdivps, - /// Divide scalar double-precision floating-point values - vdivsd, - /// Divide scalar single-precision floating-point values - vdivss, - /// Maximum of packed double-precision floating-point values - vmaxpd, - /// Maximum of packed single-precision floating-point values - vmaxps, - /// Maximum of scalar double-precision floating-point values - vmaxsd, - /// Maximum of scalar single-precision floating-point values - vmaxss, - /// Minimum of packed double-precision floating-point values - vminpd, - /// Minimum of packed single-precision floating-point values - vminps, - /// Minimum of scalar double-precision floating-point values - vminsd, - /// Minimum of scalar single-precision floating-point values - vminss, - /// Move aligned packed double-precision floating-point values - vmovapd, - /// Move aligned packed single-precision floating-point values - vmovaps, - /// Move packed single-precision floating-point values high to low - vmovhlps, - /// Replicate double floating-point values - vmovddup, - /// Move or merge scalar double-precision floating-point value - vmovsd, - /// Replicate single floating-point values - vmovshdup, - /// Replicate single floating-point values - vmovsldup, - /// Move or merge scalar single-precision floating-point value - vmovss, - /// Move unaligned packed double-precision floating-point values - vmovupd, - /// Move unaligned packed single-precision floating-point values - vmovups, - /// Multiply packed double-precision floating-point values - vmulpd, - /// Multiply packed single-precision floating-point values - vmulps, - /// Multiply scalar double-precision floating-point values - vmulsd, - /// Multiply scalar single-precision floating-point values - vmulss, - /// Extract Byte - vpextrb, - /// Extract Doubleword - vpextrd, - /// Extract Quadword - vpextrq, - /// Extract word - vpextrw, - /// Insert Byte - vpinsrb, - /// Insert Doubleword - vpinsrd, - /// Insert Quadword - vpinsrq, - /// Insert word - vpinsrw, - /// Shuffle packed high words - vpshufhw, - /// Shuffle packed low words - vpshuflw, - /// Shift packed data right logical - vpsrld, - /// Shift packed data right logical - vpsrlq, - /// Shift packed data right logical - vpsrlw, - /// Unpack high data - vpunpckhbw, - /// Unpack high data - vpunpckhdq, - /// Unpack high data - vpunpckhqdq, - /// Unpack high data - vpunpckhwd, - /// Unpack low data - vpunpcklbw, - /// Unpack low data - vpunpckldq, - /// Unpack low data - vpunpcklqdq, - /// Unpack low data - vpunpcklwd, /// Round packed double-precision floating-point values - vroundpd, - /// Round packed single-precision floating-point values - vroundps, /// Round scalar double-precision floating-point value - vroundsd, - /// Round scalar single-precision floating-point value - vroundss, - /// Square root of packed double-precision floating-point value - vsqrtpd, - /// Square root of packed single-precision floating-point value - vsqrtps, - /// Square root of scalar double-precision floating-point value - vsqrtsd, - /// Square root of scalar single-precision floating-point value - vsqrtss, - /// Subtract packed double-precision floating-point values - vsubpd, - /// Subtract packed single-precision floating-point values - vsubps, - /// Subtract scalar double-precision floating-point values - vsubsd, - /// Subtract scalar single-precision floating-point values - vsubss, - /// Unpack and interleave high packed double-precision floating-point values - vunpckhpd, - /// Unpack and interleave high packed single-precision floating-point values - vunpckhps, - /// Unpack and interleave low packed double-precision floating-point values - vunpcklpd, - /// Unpack and interleave low packed single-precision floating-point values - vunpcklps, + round, /// Convert 16-bit floating-point values to single-precision floating-point values - vcvtph2ps, + cvtph2ps, /// Convert single-precision floating-point values to 16-bit floating-point values - vcvtps2ph, + cvtps2ph, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd132pd, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd213pd, - /// Fused multiply-add of packed double-precision floating-point values - vfmadd231pd, - /// Fused multiply-add of packed single-precision floating-point values - vfmadd132ps, /// Fused multiply-add of packed single-precision floating-point values - vfmadd213ps, - /// Fused multiply-add of packed single-precision floating-point values - vfmadd231ps, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd132sd, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd213sd, - /// Fused multiply-add of scalar double-precision floating-point values - vfmadd231sd, /// Fused multiply-add of scalar single-precision floating-point values - vfmadd132ss, + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd132, + /// Fused multiply-add of packed single-precision floating-point values /// Fused multiply-add of scalar single-precision floating-point values - vfmadd213ss, + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd213, + /// Fused multiply-add of packed single-precision floating-point values /// Fused multiply-add of scalar single-precision floating-point values - vfmadd231ss, + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd231, /// A pseudo instruction that requires special lowering. /// This should be the only tag in this enum that doesn't |
