diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 407 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 14 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 46 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 78 | ||||
| -rw-r--r-- | src/arch/x86_64/bits.zig | 20 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 98 |
7 files changed, 551 insertions, 124 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 02c7aaf20f..f2626b9a9a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -656,11 +656,14 @@ const InstTracking = struct { fn reuse( self: *InstTracking, function: *Self, - new_inst: Air.Inst.Index, + new_inst: ?Air.Inst.Index, old_inst: Air.Inst.Index, ) void { self.short = .{ .dead = function.scope_generation }; - tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst }); + if (new_inst) |inst| + tracking_log.debug("%{d} => {} (reuse %{d})", .{ inst, self.*, old_inst }) + else + tracking_log.debug("tmp => {} (reuse %{d})", .{ self.*, old_inst }); } fn liveOut(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void { @@ -1560,24 +1563,58 @@ fn asmRegisterMemoryImmediate( m: Memory, imm: Immediate, ) !void { - _ = try self.addInst(.{ - .tag = tag[1], - .ops = switch (m) { - .sib => .rmi_sib, - .rip => .rmi_rip, + if (switch (imm) { + .signed => |s| if (math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null, + .unsigned => |u| math.cast(u16, u), + }) |small_imm| { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .rmi_sib, + .rip => .rmi_rip, + else => unreachable, + }, + .data = .{ .rix = .{ + .fixes = tag[0], + .r1 = reg, + .i = small_imm, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); + } else { + const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { + .signed => |s| @bitCast(s), + .unsigned => unreachable, + } }); + assert(payload + 1 == switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), else => unreachable, - }, - .data = .{ .rix = .{ - .fixes = tag[0], - .r1 = reg, - .i = @as(u8, @intCast(imm.unsigned)), - .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + }); + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => switch (imm) { + .signed => .rmi_sib_s, + .unsigned => .rmi_sib_u, + }, + .rip => switch (imm) { + .signed => .rmi_rip_s, + .unsigned => .rmi_rip_u, + }, else => unreachable, }, - } }, - }); + .data = .{ .rx = .{ + .fixes = tag[0], + .r1 = reg, + .payload = payload, + } }, + }); + } } fn asmRegisterRegisterMemoryImmediate( @@ -3713,14 +3750,22 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue else => unreachable, .mul => {}, .div => switch (tag[0]) { - ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx), - .i_ => switch (self.regBitSize(ty)) { - 8 => try self.asmOpOnly(.{ ._, .cbw }), - 16 => try self.asmOpOnly(.{ ._, .cwd }), - 32 => try self.asmOpOnly(.{ ._, .cdq }), - 64 => try self.asmOpOnly(.{ ._, .cqo }), - else => unreachable, + ._ => { + const hi_reg: Register = + switch (self.regBitSize(ty)) { + 8 => .ah, + 16, 32, 64 => .edx, + else => unreachable, + }; + try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg); }, + .i_ => try self.asmOpOnly(.{ ._, switch (self.regBitSize(ty)) { + 8 => .cbw, + 16 => .cwd, + 32 => .cdq, + 64 => .cqo, + else => unreachable, + } }), else => unreachable, }, } @@ -5210,13 +5255,11 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) .child = (try mod.intType(.signed, scalar_bits)).ip_index, }); - const sign_val = switch (tag) { + const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = switch (tag) { .neg => try vec_ty.minInt(mod, vec_ty), .abs => try vec_ty.maxInt(mod, vec_ty), else => unreachable, - }; - - const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); + } }); const sign_mem = if (sign_mcv.isMemory()) sign_mcv.mem(Memory.PtrSize.fromSize(abi_size)) else @@ -5285,7 +5328,6 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.typeOf(un_op); - return self.floatSign(inst, un_op, ty); } @@ -5782,7 +5824,7 @@ fn reuseOperandAdvanced( operand: Air.Inst.Ref, op_index: Liveness.OperandInt, mcv: MCValue, - tracked_inst: Air.Inst.Index, + maybe_tracked_inst: ?Air.Inst.Index, ) bool { if (!self.liveness.operandDies(inst, op_index)) return false; @@ -5791,11 +5833,13 @@ fn reuseOperandAdvanced( .register, .register_pair => for (mcv.getRegs()) |reg| { // If it's in the registers table, need to associate the register(s) with the // new instruction. - if (!self.register_manager.isRegFree(reg)) { - if (RegisterManager.indexOfRegIntoTracked(reg)) |index| { - self.register_manager.registers[index] = tracked_inst; + if (maybe_tracked_inst) |tracked_inst| { + if (!self.register_manager.isRegFree(reg)) { + if (RegisterManager.indexOfRegIntoTracked(reg)) |index| { + self.register_manager.registers[index] = tracked_inst; + } } - } + } else self.register_manager.freeReg(reg); }, .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, else => return false, @@ -5804,7 +5848,7 @@ fn reuseOperandAdvanced( // Prevent the operand deaths processing code from deallocating it. self.liveness.clearOperandDeath(inst, op_index); const op_inst = Air.refToIndex(operand).?; - self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst); + self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst); return true; } @@ -7234,12 +7278,18 @@ fn genBinOp( if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); - const lhs_mcv = try self.resolveInst(lhs_air); - const rhs_mcv = try self.resolveInst(rhs_air); + const ordered_air = if (lhs_ty.isVector(mod) and lhs_ty.childType(mod).isAbiInt(mod) and + switch (air_tag) { + .cmp_lt, .cmp_gte => true, + else => false, + }) .{ .lhs = rhs_air, .rhs = lhs_air } else .{ .lhs = lhs_air, .rhs = rhs_air }; + + const lhs_mcv = try self.resolveInst(ordered_air.lhs); + const rhs_mcv = try self.resolveInst(ordered_air.rhs); switch (lhs_mcv) { .immediate => |imm| switch (imm) { 0 => switch (air_tag) { - .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, rhs_air), + .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air.rhs), else => {}, }, else => {}, @@ -7288,11 +7338,15 @@ fn genBinOp( var copied_to_dst = true; const dst_mcv: MCValue = dst: { if (maybe_inst) |inst| { - if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) { + const tracked_inst = switch (air_tag) { + else => inst, + .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null, + }; + if ((!vec_op or lhs_mcv.isRegister()) and + self.reuseOperandAdvanced(inst, ordered_air.lhs, 0, lhs_mcv, tracked_inst)) break :dst lhs_mcv; - } if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and - self.reuseOperand(inst, rhs_air, 1, rhs_mcv)) + self.reuseOperandAdvanced(inst, ordered_air.rhs, 1, rhs_mcv, tracked_inst)) { flipped = true; break :dst rhs_mcv; @@ -7657,7 +7711,10 @@ fn genBinOp( .sub, .sub_wrap, => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, - .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_and => if (self.hasFeature(.avx)) + .{ .vp_, .@"and" } + else + .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { @@ -7688,6 +7745,20 @@ fn genBinOp( else null, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_b, .cmpgt } + else + .{ .p_b, .cmpgt }, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq }, else => null, }, 17...32 => switch (air_tag) { @@ -7708,6 +7779,17 @@ fn genBinOp( .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null, .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null, else => null, }, else => null, @@ -7723,7 +7805,10 @@ fn genBinOp( .mul, .mul_wrap, => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, - .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_and => if (self.hasFeature(.avx)) + .{ .vp_, .@"and" } + else + .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { @@ -7746,6 +7831,20 @@ fn genBinOp( else .{ .p_w, .maxu }, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .cmpgt } + else + .{ .p_w, .cmpgt }, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq }, else => null, }, 9...16 => switch (air_tag) { @@ -7769,6 +7868,17 @@ fn genBinOp( .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null, .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null, else => null, }, else => null, @@ -7789,7 +7899,10 @@ fn genBinOp( .{ .p_d, .mull } else null, - .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_and => if (self.hasFeature(.avx)) + .{ .vp_, .@"and" } + else + .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { @@ -7820,6 +7933,20 @@ fn genBinOp( else null, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .cmpgt } + else + .{ .p_d, .cmpgt }, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq }, else => null, }, 5...8 => switch (air_tag) { @@ -7843,6 +7970,17 @@ fn genBinOp( .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null, .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null, }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, else => null, }, else => null, @@ -7855,9 +7993,33 @@ fn genBinOp( .sub, .sub_wrap, => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, - .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_and => if (self.hasFeature(.avx)) + .{ .vp_, .@"and" } + else + .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .cmp_lt, + .cmp_lte, + .cmp_gte, + .cmp_gt, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_q, .cmpgt } + else if (self.hasFeature(.sse4_2)) + .{ .p_q, .cmpgt } + else + null, + .unsigned => null, + }, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) + .{ .vp_q, .cmpeq } + else if (self.hasFeature(.sse4_1)) + .{ .p_q, .cmpeq } + else + null, else => null, }, 3...4 => switch (air_tag) { @@ -7870,6 +8032,17 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .cmp_eq, + .cmp_neq, + => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, + .cmp_lt, + .cmp_lte, + .cmp_gt, + .cmp_gte, + => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, + .unsigned => null, + }, else => null, }, else => null, @@ -8435,6 +8608,62 @@ fn genBinOp( ); } }, + .cmp_lt, + .cmp_lte, + .cmp_eq, + .cmp_gte, + .cmp_gt, + .cmp_neq, + => { + switch (air_tag) { + .cmp_lt, + .cmp_eq, + .cmp_gt, + => {}, + .cmp_lte, + .cmp_gte, + .cmp_neq, + => { + const unsigned_ty = try lhs_ty.toUnsigned(mod); + const not_mcv = try self.genTypedValue(.{ + .ty = lhs_ty, + .val = try unsigned_ty.maxInt(mod, unsigned_ty), + }); + const not_mem = if (not_mcv.isMemory()) + not_mcv.mem(Memory.PtrSize.fromSize(abi_size)) + else + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ + .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), + } }); + switch (mir_tag[0]) { + .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( + .{ .vp_, .xor }, + dst_reg, + dst_reg, + not_mem, + ), + .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory( + .{ .p_, .xor }, + dst_reg, + not_mem, + ), + else => unreachable, + } + }, + else => unreachable, + } + + const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp); + const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg); + defer self.register_manager.unlockReg(gp_lock); + + try self.asmRegisterRegister(switch (mir_tag[0]) { + .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk }, + .p_b, .p_d, .p_q, .p_w => .{ .p_b, .movmsk }, + else => unreachable, + }, gp_reg.to32(), dst_reg); + return .{ .register = gp_reg }; + }, else => unreachable, } @@ -9741,8 +9970,15 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { } fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { - _ = inst; - return self.fail("TODO implement airCmpVector for {}", .{self.target.cpu.arch}); + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; + const dst_mcv = try self.genBinOp( + inst, + Air.Inst.Tag.fromCmpOp(extra.compareOperator(), false), + extra.lhs, + extra.rhs, + ); + return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { @@ -12592,7 +12828,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { .{ .i_, .mul }, len_reg, len_reg, - Immediate.u(elem_abi_size), + Immediate.s(elem_abi_size), ); try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); @@ -12645,8 +12881,23 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock); const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { - .Slice => dst_ptr.address().offset(8).deref(), - .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) }, + .Slice => len: { + const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); + defer self.register_manager.unlockReg(len_lock); + + try self.asmRegisterMemoryImmediate( + .{ .i_, .mul }, + len_reg, + dst_ptr.address().offset(8).deref().mem(.qword), + Immediate.s(@intCast(dst_ptr_ty.childType(mod).abiSize(mod))), + ); + break :len .{ .register = len_reg }; + }, + .One => len: { + const array_ty = dst_ptr_ty.childType(mod); + break :len .{ .immediate = array_ty.arrayLen(mod) * array_ty.childType(mod).abiSize(mod) }; + }, .C, .Many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -12999,10 +13250,60 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { } fn airReduce(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const reduce = self.air.instructions.items(.data)[inst].reduce; - _ = reduce; - return self.fail("TODO implement airReduce for x86_64", .{}); - //return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); + + const result: MCValue = result: { + const operand_ty = self.typeOf(reduce.operand); + if (operand_ty.isVector(mod) and operand_ty.childType(mod).toIntern() == .bool_type) { + try self.spillEflagsIfOccupied(); + + const operand_mcv = try self.resolveInst(reduce.operand); + const mask_len = (std.math.cast(u6, operand_ty.vectorLen(mod)) orelse + return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)})); + const mask = (@as(u64, 1) << mask_len) - 1; + const abi_size: u32 = @intCast(operand_ty.abiSize(mod)); + switch (reduce.operation) { + .Or => { + if (operand_mcv.isMemory()) try self.asmMemoryImmediate( + .{ ._, .@"test" }, + operand_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mask), + ) else { + const operand_reg = registerAlias(if (operand_mcv.isRegister()) + operand_mcv.getReg().? + else + try self.copyToTmpRegister(operand_ty, operand_mcv), abi_size); + if (mask_len < abi_size * 8) try self.asmRegisterImmediate( + .{ ._, .@"test" }, + operand_reg, + Immediate.u(mask), + ) else try self.asmRegisterRegister( + .{ ._, .@"test" }, + operand_reg, + operand_reg, + ); + } + break :result .{ .eflags = .nz }; + }, + .And => { + const tmp_reg = try self.copyToTmpRegister(operand_ty, operand_mcv); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegister(.{ ._, .not }, tmp_reg); + if (mask_len < abi_size * 8) + try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, Immediate.u(mask)) + else + try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg); + break :result .{ .eflags = .z }; + }, + else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}), + } + } + return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}); + }; + return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index e03b0f01b5..ea00a0b627 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -19,18 +19,18 @@ pub const Error = Lower.Error || error{ pub fn emitMir(emit: *Emit) Error!void { for (0..emit.lower.mir.instructions.len) |mir_i| { - const mir_index = @as(Mir.Inst.Index, @intCast(mir_i)); + const mir_index: Mir.Inst.Index = @intCast(mir_i); try emit.code_offset_mapping.putNoClobber( emit.lower.allocator, mir_index, - @as(u32, @intCast(emit.code.items.len)), + @intCast(emit.code.items.len), ); const lowered = try emit.lower.lowerMir(mir_index); var lowered_relocs = lowered.relocs; for (lowered.insts, 0..) |lowered_inst, lowered_index| { - const start_offset = @as(u32, @intCast(emit.code.items.len)); + const start_offset: u32 = @intCast(emit.code.items.len); try lowered_inst.encode(emit.code.writer(), .{}); - const end_offset = @as(u32, @intCast(emit.code.items.len)); + const end_offset: u32 = @intCast(emit.code.items.len); while (lowered_relocs.len > 0 and lowered_relocs[0].lowered_inst_index == lowered_index) : ({ lowered_relocs = lowered_relocs[1..]; @@ -39,7 +39,7 @@ pub fn emitMir(emit: *Emit) Error!void { .source = start_offset, .target = target, .offset = end_offset - 4, - .length = @as(u5, @intCast(end_offset - start_offset)), + .length = @intCast(end_offset - start_offset), }), .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.Elf)) |elf_file| { // Add relocation to the decl. @@ -220,7 +220,7 @@ const Reloc = struct { /// Target of the relocation. target: Mir.Inst.Index, /// Offset of the relocation within the instruction. - offset: usize, + offset: u32, /// Length of the instruction. length: u5, }; diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 3ef835aa18..e18c7da974 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -266,6 +266,8 @@ pub const Mnemonic = enum { packssdw, packsswb, packuswb, paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, pand, pandn, por, pxor, + pcmpeqb, pcmpeqd, pcmpeqw, + pcmpgtb, pcmpgtd, pcmpgtw, pmulhw, pmullw, psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw, // SSE @@ -278,11 +280,12 @@ pub const Mnemonic = enum { maxps, maxss, minps, minss, movaps, movhlps, movlhps, + movmskps, movss, movups, mulps, mulss, orps, pextrw, pinsrw, - pmaxsw, pmaxub, pminsw, pminub, + pmaxsw, pmaxub, pminsw, pminub, pmovmskb, shufps, sqrtps, sqrtss, subps, subss, @@ -301,6 +304,7 @@ pub const Mnemonic = enum { minpd, minsd, movapd, movdqa, movdqu, + movmskpd, //movsd, movupd, mulpd, mulsd, @@ -323,11 +327,14 @@ pub const Mnemonic = enum { extractps, insertps, packusdw, + pcmpeqq, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, pmulld, roundpd, roundps, roundsd, roundss, + // SSE4.2 + pcmpgtq, // AVX vaddpd, vaddps, vaddsd, vaddss, vandnpd, vandnps, vandpd, vandps, @@ -348,6 +355,7 @@ pub const Mnemonic = enum { vmovddup, vmovdqa, vmovdqu, vmovhlps, vmovlhps, + vmovmskpd, vmovmskps, vmovq, vmovsd, vmovshdup, vmovsldup, @@ -359,10 +367,13 @@ pub const Mnemonic = enum { vpackssdw, vpacksswb, vpackusdw, vpackuswb, vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw, vpand, vpandn, + vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw, + vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, + vpmovmskb, vpmulhw, vpmulld, vpmullw, vpor, vpshufhw, vpshuflw, @@ -754,6 +765,7 @@ pub const Feature = enum { sse2, sse3, sse4_1, + sse4_2, ssse3, x87, }; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index ae5f86d6b0..5ac3c3a72c 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -190,7 +190,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_probe_align_ri_s => { try lower.emit(.none, .@"test", &.{ .{ .reg = inst.data.ri.r1 }, - .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.ri.i))) }, + .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) }, }); try lower.emit(.none, .jz, &.{ .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, @@ -226,14 +226,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { } try lower.emit(.none, .sub, &.{ .{ .reg = inst.data.ri.r1 }, - .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.ri.i))) }, + .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) }, }); assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts); }, .pseudo_probe_adjust_setup_rri_s => { try lower.emit(.none, .mov, &.{ .{ .reg = inst.data.rri.r2.to32() }, - .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.rri.i))) }, + .{ .imm = Immediate.s(@bitCast(inst.data.rri.i)) }, }); try lower.emit(.none, .sub, &.{ .{ .reg = inst.data.rri.r1 }, @@ -291,7 +291,9 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .i_s, .mi_sib_s, .mi_rip_s, - => Immediate.s(@as(i32, @bitCast(i))), + .rmi_sib_s, + .rmi_rip_s, + => Immediate.s(@bitCast(i)), .rrri, .rri_u, @@ -301,6 +303,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .mi_rip_u, .rmi_sib, .rmi_rip, + .rmi_sib_u, + .rmi_rip_u, .mri_sib, .mri_rip, .rrm_sib, @@ -319,6 +323,8 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, .rmi_sib, + .rmi_sib_s, + .rmi_sib_u, .m_sib, .mi_sib_u, .mi_sib_s, @@ -335,6 +341,8 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { .rm_rip, .rmi_rip, + .rmi_rip_s, + .rmi_rip_u, .m_rip, .mi_rip_u, .mi_rip_s, @@ -383,13 +391,29 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .rrri => inst.data.rrri.fixes, .rri_s, .rri_u => inst.data.rri.fixes, .ri_s, .ri_u => inst.data.ri.fixes, - .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes, + .ri64, + .rm_sib, + .rm_rip, + .rmi_sib_s, + .rmi_sib_u, + .rmi_rip_s, + .rmi_rip_u, + .mr_sib, + .mr_rip, + => inst.data.rx.fixes, .mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes, .rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes, .rrmi_sib, .rrmi_rip => inst.data.rrix.fixes, .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => inst.data.x.fixes, .m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes, - .extern_fn_reloc, .got_reloc, .extern_got_reloc, .direct_reloc, .direct_got_reloc, .import_reloc, .tlv_reloc => ._, + .extern_fn_reloc, + .got_reloc, + .extern_got_reloc, + .direct_reloc, + .direct_got_reloc, + .import_reloc, + .tlv_reloc, + => ._, else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}), }; try lower.emit(switch (fixes) { @@ -461,7 +485,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .m_sib, .m_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{ + .mi_sib_s, .mi_sib_u, .mi_rip_s, .mi_rip_u => &.{ .{ .mem = lower.mem(inst.ops, inst.data.x.payload + 1) }, .{ .imm = lower.imm( inst.ops, @@ -477,6 +501,14 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, + .rmi_sib_s, .rmi_sib_u, .rmi_rip_s, .rmi_rip_u => &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload + 1) }, + .{ .imm = lower.imm( + inst.ops, + lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm, + ) }, + }, .mr_sib, .mr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, .{ .reg = inst.data.rx.r1 }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 23bef3c03b..3a5d5c0659 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -474,6 +474,10 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Compare packed data for equal + cmpeq, + /// Compare packed data for greater than + cmpgt, /// Maximum of packed signed integers maxs, /// Maximum of packed unsigned integers @@ -482,6 +486,10 @@ pub const Inst = struct { mins, /// Minimum of packed unsigned integers minu, + /// Move byte mask + /// Extract packed single precision floating-point sign mask + /// Extract packed double precision floating-point sign mask + movmsk, /// Multiply packed signed integers and store low result mull, /// Multiply packed signed integers and store high result @@ -720,9 +728,24 @@ pub const Inst = struct { /// Register, memory (RIP) operands. /// Uses `rx` payload. rm_rip, - /// Register, memory (SIB), immediate (byte) operands. + /// Register, memory (SIB), immediate (word) operands. /// Uses `rix` payload with extra data of type `MemorySib`. rmi_sib, + /// Register, memory (RIP), immediate (word) operands. + /// Uses `rix` payload with extra data of type `MemoryRip`. + rmi_rip, + /// Register, memory (SIB), immediate (signed) operands. + /// Uses `rx` payload with extra data of type `Imm32` followed by `MemorySib`. + rmi_sib_s, + /// Register, memory (SIB), immediate (unsigned) operands. + /// Uses `rx` payload with extra data of type `Imm32` followed by `MemorySib`. + rmi_sib_u, + /// Register, memory (RIP), immediate (signed) operands. + /// Uses `rx` payload with extra data of type `Imm32` followed by `MemoryRip`. + rmi_rip_s, + /// Register, memory (RIP), immediate (unsigned) operands. + /// Uses `rx` payload with extra data of type `Imm32` followed by `MemoryRip`. + rmi_rip_u, /// Register, register, memory (RIP). /// Uses `rrix` payload with extra data of type `MemoryRip`. rrm_rip, @@ -735,27 +758,24 @@ pub const Inst = struct { /// Register, register, memory (SIB), immediate (byte) operands. /// Uses `rrix` payload with extra data of type `MemorySib`. rrmi_sib, - /// Register, memory (RIP), immediate (byte) operands. - /// Uses `rix` payload with extra data of type `MemoryRip`. - rmi_rip, /// Single memory (SIB) operand. /// Uses `x` with extra data of type `MemorySib`. m_sib, /// Single memory (RIP) operand. /// Uses `x` with extra data of type `MemoryRip`. m_rip, - /// Memory (SIB), immediate (unsigned) operands. - /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. - mi_sib_u, - /// Memory (RIP), immediate (unsigned) operands. - /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. - mi_rip_u, /// Memory (SIB), immediate (sign-extend) operands. /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_s, + /// Memory (SIB), immediate (unsigned) operands. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. + mi_sib_u, /// Memory (RIP), immediate (sign-extend) operands. /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_s, + /// Memory (RIP), immediate (unsigned) operands. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. + mi_rip_u, /// Memory (SIB), register operands. /// Uses `rx` payload with extra data of type `MemorySib`. mr_sib, @@ -768,10 +788,10 @@ pub const Inst = struct { /// Memory (RIP), register, register operands. /// Uses `rrx` payload with extra data of type `MemoryRip`. mrr_rip, - /// Memory (SIB), register, immediate (byte) operands. + /// Memory (SIB), register, immediate (word) operands. /// Uses `rix` payload with extra data of type `MemorySib`. mri_sib, - /// Memory (RIP), register, immediate (byte) operands. + /// Memory (RIP), register, immediate (word) operands. /// Uses `rix` payload with extra data of type `MemoryRip`. mri_rip, /// Rax, Memory moffs. @@ -955,7 +975,7 @@ pub const Inst = struct { rix: struct { fixes: Fixes = ._, r1: Register, - i: u8, + i: u16, payload: u32, }, /// Register, register, byte immediate, followed by Custom payload found in extra. @@ -1010,7 +1030,7 @@ pub const RegisterList = struct { fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt { for (registers, 0..) |cpreg, i| { - if (reg.id() == cpreg.id()) return @as(u32, @intCast(i)); + if (reg.id() == cpreg.id()) return @intCast(i); } unreachable; // register not in input register list! } @@ -1030,7 +1050,7 @@ pub const RegisterList = struct { } pub fn count(self: Self) u32 { - return @as(u32, @intCast(self.bitset.count())); + return @intCast(self.bitset.count()); } }; @@ -1044,14 +1064,14 @@ pub const Imm64 = struct { pub fn encode(v: u64) Imm64 { return .{ - .msb = @as(u32, @truncate(v >> 32)), - .lsb = @as(u32, @truncate(v)), + .msb = @truncate(v >> 32), + .lsb = @truncate(v), }; } pub fn decode(imm: Imm64) u64 { var res: u64 = 0; - res |= (@as(u64, @intCast(imm.msb)) << 32); + res |= @as(u64, @intCast(imm.msb)) << 32; res |= @as(u64, @intCast(imm.lsb)); return res; } @@ -1075,7 +1095,7 @@ pub const MemorySib = struct { assert(sib.scale_index.scale == 0 or std.math.isPowerOfTwo(sib.scale_index.scale)); return .{ .ptr_size = @intFromEnum(sib.ptr_size), - .base_tag = @intFromEnum(@as(Memory.Base.Tag, sib.base)), + .base_tag = @intFromEnum(sib.base), .base = switch (sib.base) { .none => undefined, .reg => |r| @intFromEnum(r), @@ -1091,18 +1111,18 @@ pub const MemorySib = struct { } pub fn decode(msib: MemorySib) Memory { - const scale = @as(u4, @truncate(msib.scale_index)); + const scale: u4 = @truncate(msib.scale_index); assert(scale == 0 or std.math.isPowerOfTwo(scale)); return .{ .sib = .{ - .ptr_size = @as(Memory.PtrSize, @enumFromInt(msib.ptr_size)), + .ptr_size = @enumFromInt(msib.ptr_size), .base = switch (@as(Memory.Base.Tag, @enumFromInt(msib.base_tag))) { .none => .none, - .reg => .{ .reg = @as(Register, @enumFromInt(msib.base)) }, - .frame => .{ .frame = @as(bits.FrameIndex, @enumFromInt(msib.base)) }, + .reg => .{ .reg = @enumFromInt(msib.base) }, + .frame => .{ .frame = @enumFromInt(msib.base) }, }, .scale_index = .{ .scale = scale, - .index = if (scale > 0) @as(Register, @enumFromInt(msib.scale_index >> 4)) else undefined, + .index = if (scale > 0) @enumFromInt(msib.scale_index >> 4) else undefined, }, .disp = msib.disp, } }; @@ -1124,7 +1144,7 @@ pub const MemoryRip = struct { pub fn decode(mrip: MemoryRip) Memory { return .{ .rip = .{ - .ptr_size = @as(Memory.PtrSize, @enumFromInt(mrip.ptr_size)), + .ptr_size = @enumFromInt(mrip.ptr_size), .disp = mrip.disp, } }; } @@ -1141,14 +1161,14 @@ pub const MemoryMoffs = struct { pub fn encode(seg: Register, offset: u64) MemoryMoffs { return .{ .seg = @intFromEnum(seg), - .msb = @as(u32, @truncate(offset >> 32)), - .lsb = @as(u32, @truncate(offset >> 0)), + .msb = @truncate(offset >> 32), + .lsb = @truncate(offset >> 0), }; } pub fn decode(moffs: MemoryMoffs) Memory { return .{ .moffs = .{ - .seg = @as(Register, @enumFromInt(moffs.seg)), + .seg = @enumFromInt(moffs.seg), .offset = @as(u64, moffs.msb) << 32 | @as(u64, moffs.lsb) << 0, } }; } @@ -1168,7 +1188,7 @@ pub fn extraData(mir: Mir, comptime T: type, index: u32) struct { data: T, end: inline for (fields) |field| { @field(result, field.name) = switch (field.type) { u32 => mir.extra[i], - i32 => @as(i32, @bitCast(mir.extra[i])), + i32 => @bitCast(mir.extra[i]), else => @compileError("bad field type"), }; i += 1; diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 5cffaf4fe0..695f2d585a 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -232,7 +232,7 @@ pub const Register = enum(u7) { else => unreachable, // zig fmt: on }; - return @as(u6, @intCast(@intFromEnum(reg) - base)); + return @intCast(@intFromEnum(reg) - base); } pub fn bitSize(reg: Register) u64 { @@ -291,11 +291,11 @@ pub const Register = enum(u7) { else => unreachable, // zig fmt: on }; - return @as(u4, @truncate(@intFromEnum(reg) - base)); + return @truncate(@intFromEnum(reg) - base); } pub fn lowEnc(reg: Register) u3 { - return @as(u3, @truncate(reg.enc())); + return @truncate(reg.enc()); } pub fn toBitSize(reg: Register, bit_size: u64) Register { @@ -325,19 +325,19 @@ pub const Register = enum(u7) { } pub fn to64(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.rax))); + return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.rax)); } pub fn to32(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.eax))); + return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.eax)); } pub fn to16(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.ax))); + return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.ax)); } pub fn to8(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.al))); + return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.al)); } fn sseBase(reg: Register) u7 { @@ -350,11 +350,11 @@ pub const Register = enum(u7) { } pub fn to256(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.ymm0))); + return @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.ymm0)); } pub fn to128(reg: Register) Register { - return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.xmm0))); + return @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.xmm0)); } /// DWARF register encoding @@ -619,7 +619,7 @@ pub const Immediate = union(enum) { 1, 8 => @as(i8, @bitCast(@as(u8, @intCast(x)))), 16 => @as(i16, @bitCast(@as(u16, @intCast(x)))), 32 => @as(i32, @bitCast(@as(u32, @intCast(x)))), - 64 => @as(i64, @bitCast(x)), + 64 => @bitCast(x), else => unreachable, }, }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index d6efb4cfc7..af764882c8 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -905,6 +905,9 @@ pub const table = [_]Entry{ .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, + .{ .movmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, @@ -917,6 +920,9 @@ pub const table = [_]Entry{ .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, + .{ .pmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse }, + .{ .pmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse }, + .{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse }, .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, @@ -1005,6 +1011,12 @@ pub const table = [_]Entry{ .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + .{ .movmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, + .{ .movmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, + + .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, @@ -1037,6 +1049,14 @@ pub const table = [_]Entry{ .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 }, + .{ .pcmpeqb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .none, .sse2 }, + .{ .pcmpeqw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x75 }, 0, .none, .sse2 }, + .{ .pcmpeqd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x76 }, 0, .none, .sse2 }, + + .{ .pcmpgtb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x64 }, 0, .none, .sse2 }, + .{ .pcmpgtw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x65 }, 0, .none, .sse2 }, + .{ .pcmpgtd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x66 }, 0, .none, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, @@ -1100,9 +1120,6 @@ pub const table = [_]Entry{ .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, - .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, - .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 }, - .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .none, .sse2 }, .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, @@ -1137,6 +1154,8 @@ pub const table = [_]Entry{ .{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 }, + .{ .pcmpeqq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .none, .sse4_1 }, + .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, .{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 }, .{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 }, @@ -1171,6 +1190,9 @@ pub const table = [_]Entry{ .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + // SSE4.2 + .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 }, + // AVX .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, @@ -1295,6 +1317,16 @@ pub const table = [_]Entry{ .{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + .{ .vmovmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .vex_128_wig, .avx }, + .{ .vmovmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .vex_128_wig, .avx }, + .{ .vmovmskps, .rm, &.{ .r32, .ymm }, &.{ 0x0f, 0x50 }, 0, .vex_256_wig, .avx }, + .{ .vmovmskps, .rm, &.{ .r64, .ymm }, &.{ 0x0f, 0x50 }, 0, .vex_256_wig, .avx }, + + .{ .vmovmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_128_wig, .avx }, + .{ .vmovmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_128_wig, .avx }, + .{ .vmovmskpd, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_256_wig, .avx }, + .{ .vmovmskpd, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_256_wig, .avx }, + .{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, .{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, @@ -1408,6 +1440,18 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, + .{ .vpcmpeqb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_128_wig, .avx }, + .{ .vpcmpeqw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_128_wig, .avx }, + .{ .vpcmpeqd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_128_wig, .avx }, + + .{ .vpcmpeqq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .vex_128_wig, .avx }, + + .{ .vpcmpgtb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x64 }, 0, .vex_128_wig, .avx }, + .{ .vpcmpgtw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x65 }, 0, .vex_128_wig, .avx }, + .{ .vpcmpgtd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x66 }, 0, .vex_128_wig, .avx }, + + .{ .vpcmpgtq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_128_wig, .avx }, + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, @@ -1439,6 +1483,9 @@ pub const table = [_]Entry{ .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx }, + .{ .vpmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, + .{ .vpmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, @@ -1581,29 +1628,44 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, - .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx }, - .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx }, - .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx }, + .{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 }, + .{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 }, + .{ .vpcmpeqd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpcmpeqq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpcmpgtb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x64 }, 0, .vex_256_wig, .avx2 }, + .{ .vpcmpgtw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x65 }, 0, .vex_256_wig, .avx2 }, + .{ .vpcmpgtd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x66 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpcmpgtq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx2 }, + .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx2 }, + .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx2 }, + .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx2 }, - .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx }, - .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx }, + .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx2 }, - .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx }, + .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx2 }, + .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx2 }, + .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx2 }, - .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx }, - .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx }, - .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx }, + .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx2 }, + .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx2 }, - .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx }, - .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx }, + .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx2 }, - .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx }, + .{ .vpmovmskb, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovmskb, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, - .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 }, - .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx }, + .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 }, - .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx }, + .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx2 }, .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, |
