diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-14 05:12:46 -0400 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-15 03:07:51 -0400 |
| commit | 6c6d8d67cfe14c50684c04a579c1e62bf287e8cb (patch) | |
| tree | 22be2a1952c3077a0c60c66fb4b2f47684c5bf43 /src | |
| parent | b6d61028508c5b1e1961a124bc17d4d9bda9686f (diff) | |
| download | zig-6c6d8d67cfe14c50684c04a579c1e62bf287e8cb.tar.gz zig-6c6d8d67cfe14c50684c04a579c1e62bf287e8cb.zip | |
x86_64: redo movement, float negation, and `@fabs`
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 357 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 18 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 75 | ||||
| -rw-r--r-- | src/type.zig | 12 |
5 files changed, 359 insertions, 115 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e5c6925596..80f537e046 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4681,61 +4681,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { } fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { + const tag = self.air.instructions.items(.tag)[inst]; const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); - const ty_bits = ty.floatBits(self.target.*); + const abi_size: u32 = switch (ty.abiSize(self.target.*)) { + 1...16 => 16, + 17...32 => 32, + else => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }; + const scalar_bits = ty.scalarType().floatBits(self.target.*); + + const src_mcv = try self.resolveInst(un_op); + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else if (self.hasFeature(.avx)) + .{ .register = try self.register_manager.allocReg(inst, sse) } + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); var arena = std.heap.ArenaAllocator.init(self.gpa); defer arena.deinit(); - const ExpectedContents = union { - f16: Value.Payload.Float_16, - f32: Value.Payload.Float_32, - f64: Value.Payload.Float_64, - f80: Value.Payload.Float_80, - f128: Value.Payload.Float_128, + const ExpectedContents = struct { + scalar: union { + i64: Value.Payload.I64, + big: struct { + limbs: [ + @max( + std.math.big.int.Managed.default_capacity, + std.math.big.int.calcTwosCompLimbCount(128), + ) + ]std.math.big.Limb, + pl: Value.Payload.BigInt, + }, + }, + repeated: Value.Payload.SubValue, }; var stack align(@alignOf(ExpectedContents)) = std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator()); + var int_pl = Type.Payload.Bits{ + .base = .{ .tag = .int_signed }, + .data = scalar_bits, + }; var vec_pl = Type.Payload.Array{ .base = .{ .tag = .vector }, .data = .{ - .len = @divExact(128, ty_bits), - .elem_type = ty, + .len = @divExact(abi_size * 8, scalar_bits), + .elem_type = Type.initPayload(&int_pl.base), }, }; const vec_ty = Type.initPayload(&vec_pl.base); - - var sign_pl = Value.Payload.SubValue{ - .base = .{ .tag = .repeated }, - .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*), + const sign_val = switch (tag) { + .neg => try vec_ty.minInt(stack.get(), self.target.*), + .fabs => try vec_ty.maxInt(stack.get(), self.target.*), + else => unreachable, }; - const sign_val = Value.initPayload(&sign_pl.base); const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); - - const src_mcv = try self.resolveInst(un_op); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) - src_mcv + const sign_mem = if (sign_mcv.isMemory()) + sign_mcv.mem(Memory.PtrSize.fromSize(abi_size)) else - try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, + }); - const tag = self.air.instructions.items(.tag)[inst]; - try self.genBinOpMir(switch (ty_bits) { - // No point using an extra prefix byte for *pd which performs the same operation. - 16, 32, 64, 128 => switch (tag) { - .neg => .{ ._ps, .xor }, - .fabs => .{ ._ps, .andn }, + if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( + switch (scalar_bits) { + 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) { + .neg => .{ .vp_, .xor }, + .fabs => .{ .vp_, .@"and" }, + else => unreachable, + } else switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ .v_pd, .xor }, + .fabs => .{ .v_pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), else => unreachable, }, - 80 => return self.fail("TODO implement airFloatSign for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - else => unreachable, - }, vec_ty, dst_mcv, sign_mcv); + registerAlias(dst_reg, abi_size), + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + sign_mem, + ) else try self.asmRegisterMemory( + switch (scalar_bits) { + 16, 128 => switch (tag) { + .neg => .{ .p_, .xor }, + .fabs => .{ .p_, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ ._pd, .xor }, + .fabs => .{ ._pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + sign_mem, + ); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -8593,7 +8668,6 @@ const MoveStrategy = union(enum) { const InsertExtract = struct { insert: Mir.Inst.FixedTag, extract: Mir.Inst.FixedTag, - imm: Immediate, }; }; fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { @@ -8603,17 +8677,15 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } }, 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } }, 128 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { @@ -8622,101 +8694,120 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ .insert = .{ .vp_b, .insr }, .extract = .{ .vp_b, .extr }, - .imm = Immediate.u(0), } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ .insert = .{ .p_b, .insr }, .extract = .{ .p_b, .extr }, - .imm = Immediate.u(0), } }, 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, + 9...16 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 17...32 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 16 => switch (ty.vectorLen()) { 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ps, .mov } - else - .{ ._ps, .mov } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 32 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 64 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 128 => switch (ty.vectorLen()) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 256 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, else => {}, }, .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => switch (ty.vectorLen()) { - 1 => {}, + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 32 => switch (ty.vectorLen()) { @@ -8741,18 +8832,18 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { else .{ ._sd, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } + else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } }, else => {}, }, 128 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, else => {}, @@ -8860,29 +8951,69 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ); } }, - .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( - if ((dst_reg.class() == .sse) == (src_reg.class() == .sse)) - switch (ty.zigTypeTag()) { - else => .{ ._, .mov }, - .Float, .Vector => .{ ._ps, .mova }, - } - else switch (abi_size) { - 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr }, - registerAlias(dst_reg, 4), - registerAlias(src_reg, 4), - Immediate.u(0), + .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) { + .general_purpose => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), ), - 4 => .{ ._d, .mov }, - 8 => .{ ._q, .mov }, - else => return self.fail( - "unsupported register copy from {s} to {s}", - .{ @tagName(src_reg), @tagName(dst_reg) }, + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + src_reg, ), + .sse => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + registerAlias(dst_reg, @max(abi_size, 4)), + src_reg.to128(), + ), + .x87, .mmx => unreachable, }, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), - ), + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + dst_reg, + switch (src_reg.class()) { + .general_purpose, .segment => registerAlias(src_reg, abi_size), + .sse => try self.copyToTmpRegister(ty, src_mcv), + .x87, .mmx => unreachable, + }, + ), + .sse => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + dst_reg.to128(), + registerAlias(src_reg, @max(abi_size, 4)), + ), + .segment => try self.genSetReg( + dst_reg, + ty, + .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, + ), + .sse => try self.asmRegisterRegister( + switch (ty.scalarType().zigTypeTag()) { + else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + .Float => switch (ty.floatBits(self.target.*)) { + else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, + 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova }, + }, + }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + ), + .x87, .mmx => unreachable, + }, + .x87, .mmx => unreachable, + }, .register_offset, .indirect, .load_frame, @@ -8918,14 +9049,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -8947,14 +9078,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -8994,14 +9125,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -9129,7 +9260,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ie.extract, dst_mem, src_alias, - ie.imm, + Immediate.u(0), ), } }, @@ -10499,7 +10630,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(src_ty, dst_mcv, src_mcv); + try self.genCopy(union_ty, dst_mcv, src_mcv); break :result dst_mcv; } @@ -11000,7 +11131,15 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg.to64() else unreachable, - .segment, .x87, .mmx => unreachable, + .segment => if (size_bytes <= 2) + reg + else + unreachable, + .x87 => unreachable, + .mmx => if (size_bytes <= 8) + reg + else + unreachable, .sse => if (size_bytes <= 16) reg.to128() else if (size_bytes <= 32) diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 66a249a3f2..4014947673 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -261,7 +261,8 @@ pub const Mnemonic = enum { // X87 fisttp, fld, // MMX - movd, + movd, movq, + pand, pandn, por, pxor, // SSE addps, addss, andps, @@ -293,7 +294,8 @@ pub const Mnemonic = enum { maxpd, maxsd, minpd, minsd, movapd, - movq, //movd, movsd, + movdqa, movdqu, + //movsd, movupd, mulpd, mulsd, orpd, @@ -316,6 +318,7 @@ pub const Mnemonic = enum { roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, + vandnpd, vandnps, vandpd, vandps, vbroadcastf128, vbroadcastsd, vbroadcastss, vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, @@ -327,22 +330,31 @@ pub const Mnemonic = enum { vmaxpd, vmaxps, vmaxsd, vmaxss, vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, - vmovddup, vmovhlps, vmovlhps, + vmovd, + vmovddup, + vmovdqa, vmovdqu, + vmovhlps, vmovlhps, + vmovq, vmovsd, vmovshdup, vmovsldup, vmovss, vmovupd, vmovups, vmulpd, vmulps, vmulsd, vmulss, + vorpd, vorps, + vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpor, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vpxor, vroundpd, vroundps, vroundsd, vroundss, vshufpd, vshufps, vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, vsubpd, vsubps, vsubsd, vsubss, + vxorpd, vxorps, // F16C vcvtph2ps, vcvtps2ph, // FMA diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index ef8bbe07b3..4d1f59e454 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -236,6 +236,14 @@ pub const Inst = struct { /// VEX-Encoded ___ v_, + /// VEX-Encoded ___ Byte + v_b, + /// VEX-Encoded ___ Word + v_w, + /// VEX-Encoded ___ Doubleword + v_d, + /// VEX-Encoded ___ QuadWord + v_q, /// VEX-Encoded Packed ___ vp_, /// VEX-Encoded Packed ___ Byte @@ -526,6 +534,10 @@ pub const Inst = struct { cvttps2dq, /// Convert with truncation scalar double-precision floating-point value to doubleword integer cvttsd2si, + /// Move aligned packed integer values + movdqa, + /// Move unaligned packed integer values + movdqu, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values shuf, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 3383315bd6..3e57be61ea 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -970,11 +970,16 @@ pub const table = [_]Entry{ .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 }, .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 }, - .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 }, + .{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 }, + + .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, @@ -987,10 +992,16 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, + .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 }, + + .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, + .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, @@ -1012,6 +1023,8 @@ pub const table = [_]Entry{ .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, + .{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 }, + .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 }, .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -1070,6 +1083,18 @@ pub const table = [_]Entry{ .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + .{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + + .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx }, .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, @@ -1169,13 +1194,31 @@ pub const table = [_]Entry{ .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + .{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx }, + .{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx }, + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + .{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + + .{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, @@ -1212,6 +1255,16 @@ pub const table = [_]Entry{ .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + .{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx }, + + .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, @@ -1225,6 +1278,8 @@ pub const table = [_]Entry{ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, @@ -1242,6 +1297,8 @@ pub const table = [_]Entry{ .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + .{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx }, .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx }, @@ -1278,6 +1335,12 @@ pub const table = [_]Entry{ .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + + .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, @@ -1313,6 +1376,12 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + + .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, @@ -1329,5 +1398,7 @@ pub const table = [_]Entry{ .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, + + .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on diff --git a/src/type.zig b/src/type.zig index 6122afda62..bcbb9e2ea2 100644 --- a/src/type.zig +++ b/src/type.zig @@ -5433,8 +5433,18 @@ pub const Type = extern union { } } + // Works for vectors and vectors of integers. + pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value { + const scalar = try maxIntScalar(ty.scalarType(), arena, target); + if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) { + return Value.Tag.repeated.create(arena, scalar); + } else { + return scalar; + } + } + /// Asserts that self.zigTypeTag() == .Int. - pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value { + pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value { assert(self.zigTypeTag() == .Int); const info = self.intInfo(target); |
