diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2024-02-16 07:59:16 +0100 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2024-02-25 11:22:10 +0100 |
| commit | 513c4c145ee163bb178c615a216534ac1f7e9e91 (patch) | |
| tree | 6cd1ae03642e2f260675f44c9669173a48aebb36 /src/arch | |
| parent | 2fcb2f597549edd0b1241cebf98c11efe2f25884 (diff) | |
| download | zig-513c4c145ee163bb178c615a216534ac1f7e9e91.tar.gz zig-513c4c145ee163bb178c615a216534ac1f7e9e91.zip | |
x86_64: fix avx2 `@truncacte`
Diffstat (limited to 'src/arch')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 38 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 3 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 6 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 6 |
4 files changed, 39 insertions, 14 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index ffc81988fc..214f975e8e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3274,8 +3274,8 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); break :dst dst_mcv; } else dst: { - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); + const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true); + try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); break :dst dst_mcv; }; @@ -3333,22 +3333,40 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) }, }; - const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, src_abi_size); if (self.hasFeature(.avx)) { try self.asmRegisterRegisterMemory( .{ .vp_, .@"and" }, - dst_reg, - dst_reg, + dst_alias, + dst_alias, try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); - try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg); + if (src_abi_size > 16) { + const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); + defer self.register_manager.unlockReg(temp_lock); + + try self.asmRegisterRegisterImmediate( + .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract }, + registerAlias(temp_reg, dst_abi_size), + dst_alias, + Immediate.u(1), + ); + try self.asmRegisterRegisterRegister( + mir_tag, + registerAlias(dst_reg, dst_abi_size), + registerAlias(dst_reg, dst_abi_size), + registerAlias(temp_reg, dst_abi_size), + ); + } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias); } else { try self.asmRegisterMemory( .{ .p_, .@"and" }, - dst_reg, + dst_alias, try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); - try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg); + try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); } break :result dst_mcv; } @@ -16404,7 +16422,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { }, 65...128 => switch (vector_len) { else => null, - 1...2 => .{ .vp_i128, .broadcast }, + 1...2 => .{ .v_i128, .broadcast }, }, }) orelse break :avx2; @@ -16418,7 +16436,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))), try src_mcv.mem(self, self.memSize(scalar_ty)), ) else { - if (mir_tag[0] == .vp_i128) break :avx2; + if (mir_tag[0] == .v_i128) break :avx2; try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); try self.asmRegisterRegister( mir_tag, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 5aa6d1d4a2..b2de7186c7 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -415,7 +415,8 @@ pub const Mnemonic = enum { vfmadd132sd, vfmadd213sd, vfmadd231sd, vfmadd132ss, vfmadd213ss, vfmadd231ss, // AVX2 - vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw, + vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw, + vextracti128, vinserti128, // zig fmt: on }; diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 5df5700004..6cccb34b3e 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -230,6 +230,8 @@ pub const Inst = struct { v_d, /// VEX-Encoded ___ QuadWord v_q, + /// VEX-Encoded ___ Integer Data + v_i128, /// VEX-Encoded Packed ___ vp_, /// VEX-Encoded Packed ___ Byte @@ -242,8 +244,6 @@ pub const Inst = struct { vp_q, /// VEX-Encoded Packed ___ Double Quadword vp_dq, - /// VEX-Encoded Packed ___ Integer Data - vp_i128, /// VEX-Encoded ___ Scalar Single-Precision Values v_ss, /// VEX-Encoded ___ Packed Single-Precision Values @@ -654,6 +654,7 @@ pub const Inst = struct { /// Variable blend scalar double-precision floating-point values blendv, /// Extract packed floating-point values + /// Extract packed integer values extract, /// Insert scalar single-precision floating-point value /// Insert packed floating-point values @@ -696,6 +697,7 @@ pub const Inst = struct { sha256rnds2, /// Load with broadcast floating-point data + /// Load integer and broadcast broadcast, /// Convert 16-bit floating-point values to single-precision floating-point values diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 99e0d96a44..8107d5b3b7 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1769,6 +1769,10 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vextracti128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x39 }, 0, .vex_256_w0, .avx2 }, + + .{ .vinserti128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x38 }, 0, .vex_256_w0, .avx2 }, + .{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 }, .{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 }, .{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 }, @@ -1809,7 +1813,7 @@ pub const table = [_]Entry{ .{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 }, .{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 }, .{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 }, - .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 }, .{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 }, .{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 }, |
