aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJacob Young <jacobly0@users.noreply.github.com>2023-10-21 19:30:45 -0400
committerJacob Young <jacobly0@users.noreply.github.com>2023-10-23 22:42:18 -0400
commitfe93332ba26b0cb8ca6ecce0b2c605d49a02ca87 (patch)
tree54dc414f0ed33c259f717cdef5eeda7f136a7e81 /src
parent794dc694b140908a9affc5b449cda09bbe971cfe (diff)
downloadzig-fe93332ba26b0cb8ca6ecce0b2c605d49a02ca87.tar.gz
zig-fe93332ba26b0cb8ca6ecce0b2c605d49a02ca87.zip
x86_64: implement enough to pass unicode tests
* implement vector comparison * implement reduce for bool vectors * fix `@memcpy` bug * enable passing std tests
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86_64/CodeGen.zig407
-rw-r--r--src/arch/x86_64/Emit.zig12
-rw-r--r--src/arch/x86_64/Encoding.zig14
-rw-r--r--src/arch/x86_64/Lower.zig46
-rw-r--r--src/arch/x86_64/Mir.zig78
-rw-r--r--src/arch/x86_64/bits.zig20
-rw-r--r--src/arch/x86_64/encodings.zig98
7 files changed, 551 insertions, 124 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 02c7aaf20f..f2626b9a9a 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -656,11 +656,14 @@ const InstTracking = struct {
fn reuse(
self: *InstTracking,
function: *Self,
- new_inst: Air.Inst.Index,
+ new_inst: ?Air.Inst.Index,
old_inst: Air.Inst.Index,
) void {
self.short = .{ .dead = function.scope_generation };
- tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst });
+ if (new_inst) |inst|
+ tracking_log.debug("%{d} => {} (reuse %{d})", .{ inst, self.*, old_inst })
+ else
+ tracking_log.debug("tmp => {} (reuse %{d})", .{ self.*, old_inst });
}
fn liveOut(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
@@ -1560,24 +1563,58 @@ fn asmRegisterMemoryImmediate(
m: Memory,
imm: Immediate,
) !void {
- _ = try self.addInst(.{
- .tag = tag[1],
- .ops = switch (m) {
- .sib => .rmi_sib,
- .rip => .rmi_rip,
+ if (switch (imm) {
+ .signed => |s| if (math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null,
+ .unsigned => |u| math.cast(u16, u),
+ }) |small_imm| {
+ _ = try self.addInst(.{
+ .tag = tag[1],
+ .ops = switch (m) {
+ .sib => .rmi_sib,
+ .rip => .rmi_rip,
+ else => unreachable,
+ },
+ .data = .{ .rix = .{
+ .fixes = tag[0],
+ .r1 = reg,
+ .i = small_imm,
+ .payload = switch (m) {
+ .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+ .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+ else => unreachable,
+ },
+ } },
+ });
+ } else {
+ const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) {
+ .signed => |s| @bitCast(s),
+ .unsigned => unreachable,
+ } });
+ assert(payload + 1 == switch (m) {
+ .sib => try self.addExtra(Mir.MemorySib.encode(m)),
+ .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
else => unreachable,
- },
- .data = .{ .rix = .{
- .fixes = tag[0],
- .r1 = reg,
- .i = @as(u8, @intCast(imm.unsigned)),
- .payload = switch (m) {
- .sib => try self.addExtra(Mir.MemorySib.encode(m)),
- .rip => try self.addExtra(Mir.MemoryRip.encode(m)),
+ });
+ _ = try self.addInst(.{
+ .tag = tag[1],
+ .ops = switch (m) {
+ .sib => switch (imm) {
+ .signed => .rmi_sib_s,
+ .unsigned => .rmi_sib_u,
+ },
+ .rip => switch (imm) {
+ .signed => .rmi_rip_s,
+ .unsigned => .rmi_rip_u,
+ },
else => unreachable,
},
- } },
- });
+ .data = .{ .rx = .{
+ .fixes = tag[0],
+ .r1 = reg,
+ .payload = payload,
+ } },
+ });
+ }
}
fn asmRegisterRegisterMemoryImmediate(
@@ -3713,14 +3750,22 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue
else => unreachable,
.mul => {},
.div => switch (tag[0]) {
- ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx),
- .i_ => switch (self.regBitSize(ty)) {
- 8 => try self.asmOpOnly(.{ ._, .cbw }),
- 16 => try self.asmOpOnly(.{ ._, .cwd }),
- 32 => try self.asmOpOnly(.{ ._, .cdq }),
- 64 => try self.asmOpOnly(.{ ._, .cqo }),
- else => unreachable,
+ ._ => {
+ const hi_reg: Register =
+ switch (self.regBitSize(ty)) {
+ 8 => .ah,
+ 16, 32, 64 => .edx,
+ else => unreachable,
+ };
+ try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg);
},
+ .i_ => try self.asmOpOnly(.{ ._, switch (self.regBitSize(ty)) {
+ 8 => .cbw,
+ 16 => .cwd,
+ 32 => .cdq,
+ 64 => .cqo,
+ else => unreachable,
+ } }),
else => unreachable,
},
}
@@ -5210,13 +5255,11 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type)
.child = (try mod.intType(.signed, scalar_bits)).ip_index,
});
- const sign_val = switch (tag) {
+ const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = switch (tag) {
.neg => try vec_ty.minInt(mod, vec_ty),
.abs => try vec_ty.maxInt(mod, vec_ty),
else => unreachable,
- };
-
- const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val });
+ } });
const sign_mem = if (sign_mcv.isMemory())
sign_mcv.mem(Memory.PtrSize.fromSize(abi_size))
else
@@ -5285,7 +5328,6 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type)
fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void {
const un_op = self.air.instructions.items(.data)[inst].un_op;
const ty = self.typeOf(un_op);
-
return self.floatSign(inst, un_op, ty);
}
@@ -5782,7 +5824,7 @@ fn reuseOperandAdvanced(
operand: Air.Inst.Ref,
op_index: Liveness.OperandInt,
mcv: MCValue,
- tracked_inst: Air.Inst.Index,
+ maybe_tracked_inst: ?Air.Inst.Index,
) bool {
if (!self.liveness.operandDies(inst, op_index))
return false;
@@ -5791,11 +5833,13 @@ fn reuseOperandAdvanced(
.register, .register_pair => for (mcv.getRegs()) |reg| {
// If it's in the registers table, need to associate the register(s) with the
// new instruction.
- if (!self.register_manager.isRegFree(reg)) {
- if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
- self.register_manager.registers[index] = tracked_inst;
+ if (maybe_tracked_inst) |tracked_inst| {
+ if (!self.register_manager.isRegFree(reg)) {
+ if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
+ self.register_manager.registers[index] = tracked_inst;
+ }
}
- }
+ } else self.register_manager.freeReg(reg);
},
.load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
else => return false,
@@ -5804,7 +5848,7 @@ fn reuseOperandAdvanced(
// Prevent the operand deaths processing code from deallocating it.
self.liveness.clearOperandDeath(inst, op_index);
const op_inst = Air.refToIndex(operand).?;
- self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst);
+ self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst);
return true;
}
@@ -7234,12 +7278,18 @@ fn genBinOp(
if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null;
defer if (mask_lock) |lock| self.register_manager.unlockReg(lock);
- const lhs_mcv = try self.resolveInst(lhs_air);
- const rhs_mcv = try self.resolveInst(rhs_air);
+ const ordered_air = if (lhs_ty.isVector(mod) and lhs_ty.childType(mod).isAbiInt(mod) and
+ switch (air_tag) {
+ .cmp_lt, .cmp_gte => true,
+ else => false,
+ }) .{ .lhs = rhs_air, .rhs = lhs_air } else .{ .lhs = lhs_air, .rhs = rhs_air };
+
+ const lhs_mcv = try self.resolveInst(ordered_air.lhs);
+ const rhs_mcv = try self.resolveInst(ordered_air.rhs);
switch (lhs_mcv) {
.immediate => |imm| switch (imm) {
0 => switch (air_tag) {
- .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, rhs_air),
+ .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air.rhs),
else => {},
},
else => {},
@@ -7288,11 +7338,15 @@ fn genBinOp(
var copied_to_dst = true;
const dst_mcv: MCValue = dst: {
if (maybe_inst) |inst| {
- if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) {
+ const tracked_inst = switch (air_tag) {
+ else => inst,
+ .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null,
+ };
+ if ((!vec_op or lhs_mcv.isRegister()) and
+ self.reuseOperandAdvanced(inst, ordered_air.lhs, 0, lhs_mcv, tracked_inst))
break :dst lhs_mcv;
- }
if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and
- self.reuseOperand(inst, rhs_air, 1, rhs_mcv))
+ self.reuseOperandAdvanced(inst, ordered_air.rhs, 1, rhs_mcv, tracked_inst))
{
flipped = true;
break :dst rhs_mcv;
@@ -7657,7 +7711,10 @@ fn genBinOp(
.sub,
.sub_wrap,
=> if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub },
- .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_and => if (self.hasFeature(.avx))
+ .{ .vp_, .@"and" }
+ else
+ .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
@@ -7688,6 +7745,20 @@ fn genBinOp(
else
null,
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_b, .cmpgt }
+ else
+ .{ .p_b, .cmpgt },
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq },
else => null,
},
17...32 => switch (air_tag) {
@@ -7708,6 +7779,17 @@ fn genBinOp(
.signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null,
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null,
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null,
else => null,
},
else => null,
@@ -7723,7 +7805,10 @@ fn genBinOp(
.mul,
.mul_wrap,
=> if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull },
- .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_and => if (self.hasFeature(.avx))
+ .{ .vp_, .@"and" }
+ else
+ .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
@@ -7746,6 +7831,20 @@ fn genBinOp(
else
.{ .p_w, .maxu },
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_w, .cmpgt }
+ else
+ .{ .p_w, .cmpgt },
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq },
else => null,
},
9...16 => switch (air_tag) {
@@ -7769,6 +7868,17 @@ fn genBinOp(
.signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null,
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null,
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null,
else => null,
},
else => null,
@@ -7789,7 +7899,10 @@ fn genBinOp(
.{ .p_d, .mull }
else
null,
- .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_and => if (self.hasFeature(.avx))
+ .{ .vp_, .@"and" }
+ else
+ .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
.min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
@@ -7820,6 +7933,20 @@ fn genBinOp(
else
null,
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_d, .cmpgt }
+ else
+ .{ .p_d, .cmpgt },
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq },
else => null,
},
5...8 => switch (air_tag) {
@@ -7843,6 +7970,17 @@ fn genBinOp(
.signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null,
.unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null,
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
else => null,
},
else => null,
@@ -7855,9 +7993,33 @@ fn genBinOp(
.sub,
.sub_wrap,
=> if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub },
- .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" },
+ .bit_and => if (self.hasFeature(.avx))
+ .{ .vp_, .@"and" }
+ else
+ .{ .p_, .@"and" },
.bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" },
.xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor },
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_gt,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx))
+ .{ .vp_q, .cmpgt }
+ else if (self.hasFeature(.sse4_2))
+ .{ .p_q, .cmpgt }
+ else
+ null,
+ .unsigned => null,
+ },
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx))
+ .{ .vp_q, .cmpeq }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_q, .cmpeq }
+ else
+ null,
else => null,
},
3...4 => switch (air_tag) {
@@ -7870,6 +8032,17 @@ fn genBinOp(
.bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null,
.bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null,
.xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null,
+ .cmp_eq,
+ .cmp_neq,
+ => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null,
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_gt,
+ .cmp_gte,
+ => switch (lhs_ty.childType(mod).intInfo(mod).signedness) {
+ .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null,
+ .unsigned => null,
+ },
else => null,
},
else => null,
@@ -8435,6 +8608,62 @@ fn genBinOp(
);
}
},
+ .cmp_lt,
+ .cmp_lte,
+ .cmp_eq,
+ .cmp_gte,
+ .cmp_gt,
+ .cmp_neq,
+ => {
+ switch (air_tag) {
+ .cmp_lt,
+ .cmp_eq,
+ .cmp_gt,
+ => {},
+ .cmp_lte,
+ .cmp_gte,
+ .cmp_neq,
+ => {
+ const unsigned_ty = try lhs_ty.toUnsigned(mod);
+ const not_mcv = try self.genTypedValue(.{
+ .ty = lhs_ty,
+ .val = try unsigned_ty.maxInt(mod, unsigned_ty),
+ });
+ const not_mem = if (not_mcv.isMemory())
+ not_mcv.mem(Memory.PtrSize.fromSize(abi_size))
+ else
+ Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{
+ .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()),
+ } });
+ switch (mir_tag[0]) {
+ .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory(
+ .{ .vp_, .xor },
+ dst_reg,
+ dst_reg,
+ not_mem,
+ ),
+ .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory(
+ .{ .p_, .xor },
+ dst_reg,
+ not_mem,
+ ),
+ else => unreachable,
+ }
+ },
+ else => unreachable,
+ }
+
+ const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp);
+ const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg);
+ defer self.register_manager.unlockReg(gp_lock);
+
+ try self.asmRegisterRegister(switch (mir_tag[0]) {
+ .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk },
+ .p_b, .p_d, .p_q, .p_w => .{ .p_b, .movmsk },
+ else => unreachable,
+ }, gp_reg.to32(), dst_reg);
+ return .{ .register = gp_reg };
+ },
else => unreachable,
}
@@ -9741,8 +9970,15 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
}
fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void {
- _ = inst;
- return self.fail("TODO implement airCmpVector for {}", .{self.target.cpu.arch});
+ const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+ const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data;
+ const dst_mcv = try self.genBinOp(
+ inst,
+ Air.Inst.Tag.fromCmpOp(extra.compareOperator(), false),
+ extra.lhs,
+ extra.rhs,
+ );
+ return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none });
}
fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
@@ -12592,7 +12828,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
.{ .i_, .mul },
len_reg,
len_reg,
- Immediate.u(elem_abi_size),
+ Immediate.s(elem_abi_size),
);
try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv);
@@ -12645,8 +12881,23 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock);
const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) {
- .Slice => dst_ptr.address().offset(8).deref(),
- .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) },
+ .Slice => len: {
+ const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
+ defer self.register_manager.unlockReg(len_lock);
+
+ try self.asmRegisterMemoryImmediate(
+ .{ .i_, .mul },
+ len_reg,
+ dst_ptr.address().offset(8).deref().mem(.qword),
+ Immediate.s(@intCast(dst_ptr_ty.childType(mod).abiSize(mod))),
+ );
+ break :len .{ .register = len_reg };
+ },
+ .One => len: {
+ const array_ty = dst_ptr_ty.childType(mod);
+ break :len .{ .immediate = array_ty.arrayLen(mod) * array_ty.childType(mod).abiSize(mod) };
+ },
.C, .Many => unreachable,
};
const len_lock: ?RegisterLock = switch (len) {
@@ -12999,10 +13250,60 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
}
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
+ const mod = self.bin_file.options.module.?;
const reduce = self.air.instructions.items(.data)[inst].reduce;
- _ = reduce;
- return self.fail("TODO implement airReduce for x86_64", .{});
- //return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
+
+ const result: MCValue = result: {
+ const operand_ty = self.typeOf(reduce.operand);
+ if (operand_ty.isVector(mod) and operand_ty.childType(mod).toIntern() == .bool_type) {
+ try self.spillEflagsIfOccupied();
+
+ const operand_mcv = try self.resolveInst(reduce.operand);
+ const mask_len = (std.math.cast(u6, operand_ty.vectorLen(mod)) orelse
+ return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}));
+ const mask = (@as(u64, 1) << mask_len) - 1;
+ const abi_size: u32 = @intCast(operand_ty.abiSize(mod));
+ switch (reduce.operation) {
+ .Or => {
+ if (operand_mcv.isMemory()) try self.asmMemoryImmediate(
+ .{ ._, .@"test" },
+ operand_mcv.mem(Memory.PtrSize.fromSize(abi_size)),
+ Immediate.u(mask),
+ ) else {
+ const operand_reg = registerAlias(if (operand_mcv.isRegister())
+ operand_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_ty, operand_mcv), abi_size);
+ if (mask_len < abi_size * 8) try self.asmRegisterImmediate(
+ .{ ._, .@"test" },
+ operand_reg,
+ Immediate.u(mask),
+ ) else try self.asmRegisterRegister(
+ .{ ._, .@"test" },
+ operand_reg,
+ operand_reg,
+ );
+ }
+ break :result .{ .eflags = .nz };
+ },
+ .And => {
+ const tmp_reg = try self.copyToTmpRegister(operand_ty, operand_mcv);
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+ defer self.register_manager.unlockReg(tmp_lock);
+
+ try self.asmRegister(.{ ._, .not }, tmp_reg);
+ if (mask_len < abi_size * 8)
+ try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, Immediate.u(mask))
+ else
+ try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg);
+ break :result .{ .eflags = .z };
+ },
+ else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)}),
+ }
+ }
+ return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(mod)});
+ };
+ return self.finishAir(inst, result, .{ reduce.operand, .none, .none });
}
fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index e03b0f01b5..ea00a0b627 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -19,18 +19,18 @@ pub const Error = Lower.Error || error{
pub fn emitMir(emit: *Emit) Error!void {
for (0..emit.lower.mir.instructions.len) |mir_i| {
- const mir_index = @as(Mir.Inst.Index, @intCast(mir_i));
+ const mir_index: Mir.Inst.Index = @intCast(mir_i);
try emit.code_offset_mapping.putNoClobber(
emit.lower.allocator,
mir_index,
- @as(u32, @intCast(emit.code.items.len)),
+ @intCast(emit.code.items.len),
);
const lowered = try emit.lower.lowerMir(mir_index);
var lowered_relocs = lowered.relocs;
for (lowered.insts, 0..) |lowered_inst, lowered_index| {
- const start_offset = @as(u32, @intCast(emit.code.items.len));
+ const start_offset: u32 = @intCast(emit.code.items.len);
try lowered_inst.encode(emit.code.writer(), .{});
- const end_offset = @as(u32, @intCast(emit.code.items.len));
+ const end_offset: u32 = @intCast(emit.code.items.len);
while (lowered_relocs.len > 0 and
lowered_relocs[0].lowered_inst_index == lowered_index) : ({
lowered_relocs = lowered_relocs[1..];
@@ -39,7 +39,7 @@ pub fn emitMir(emit: *Emit) Error!void {
.source = start_offset,
.target = target,
.offset = end_offset - 4,
- .length = @as(u5, @intCast(end_offset - start_offset)),
+ .length = @intCast(end_offset - start_offset),
}),
.linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.Elf)) |elf_file| {
// Add relocation to the decl.
@@ -220,7 +220,7 @@ const Reloc = struct {
/// Target of the relocation.
target: Mir.Inst.Index,
/// Offset of the relocation within the instruction.
- offset: usize,
+ offset: u32,
/// Length of the instruction.
length: u5,
};
diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig
index 3ef835aa18..e18c7da974 100644
--- a/src/arch/x86_64/Encoding.zig
+++ b/src/arch/x86_64/Encoding.zig
@@ -266,6 +266,8 @@ pub const Mnemonic = enum {
packssdw, packsswb, packuswb,
paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw,
pand, pandn, por, pxor,
+ pcmpeqb, pcmpeqd, pcmpeqw,
+ pcmpgtb, pcmpgtd, pcmpgtw,
pmulhw, pmullw,
psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw,
// SSE
@@ -278,11 +280,12 @@ pub const Mnemonic = enum {
maxps, maxss,
minps, minss,
movaps, movhlps, movlhps,
+ movmskps,
movss, movups,
mulps, mulss,
orps,
pextrw, pinsrw,
- pmaxsw, pmaxub, pminsw, pminub,
+ pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
shufps,
sqrtps, sqrtss,
subps, subss,
@@ -301,6 +304,7 @@ pub const Mnemonic = enum {
minpd, minsd,
movapd,
movdqa, movdqu,
+ movmskpd,
//movsd,
movupd,
mulpd, mulsd,
@@ -323,11 +327,14 @@ pub const Mnemonic = enum {
extractps,
insertps,
packusdw,
+ pcmpeqq,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
pmulld,
roundpd, roundps, roundsd, roundss,
+ // SSE4.2
+ pcmpgtq,
// AVX
vaddpd, vaddps, vaddsd, vaddss,
vandnpd, vandnps, vandpd, vandps,
@@ -348,6 +355,7 @@ pub const Mnemonic = enum {
vmovddup,
vmovdqa, vmovdqu,
vmovhlps, vmovlhps,
+ vmovmskpd, vmovmskps,
vmovq,
vmovsd,
vmovshdup, vmovsldup,
@@ -359,10 +367,13 @@ pub const Mnemonic = enum {
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
vpand, vpandn,
+ vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
+ vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
vpextrb, vpextrd, vpextrq, vpextrw,
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
+ vpmovmskb,
vpmulhw, vpmulld, vpmullw,
vpor,
vpshufhw, vpshuflw,
@@ -754,6 +765,7 @@ pub const Feature = enum {
sse2,
sse3,
sse4_1,
+ sse4_2,
ssse3,
x87,
};
diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig
index ae5f86d6b0..5ac3c3a72c 100644
--- a/src/arch/x86_64/Lower.zig
+++ b/src/arch/x86_64/Lower.zig
@@ -190,7 +190,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_probe_align_ri_s => {
try lower.emit(.none, .@"test", &.{
.{ .reg = inst.data.ri.r1 },
- .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.ri.i))) },
+ .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) },
});
try lower.emit(.none, .jz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }) },
@@ -226,14 +226,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
}
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.ri.r1 },
- .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.ri.i))) },
+ .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) },
});
assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
},
.pseudo_probe_adjust_setup_rri_s => {
try lower.emit(.none, .mov, &.{
.{ .reg = inst.data.rri.r2.to32() },
- .{ .imm = Immediate.s(@as(i32, @bitCast(inst.data.rri.i))) },
+ .{ .imm = Immediate.s(@bitCast(inst.data.rri.i)) },
});
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.rri.r1 },
@@ -291,7 +291,9 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
.i_s,
.mi_sib_s,
.mi_rip_s,
- => Immediate.s(@as(i32, @bitCast(i))),
+ .rmi_sib_s,
+ .rmi_rip_s,
+ => Immediate.s(@bitCast(i)),
.rrri,
.rri_u,
@@ -301,6 +303,8 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
.mi_rip_u,
.rmi_sib,
.rmi_rip,
+ .rmi_sib_u,
+ .rmi_rip_u,
.mri_sib,
.mri_rip,
.rrm_sib,
@@ -319,6 +323,8 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory {
return lower.mir.resolveFrameLoc(switch (ops) {
.rm_sib,
.rmi_sib,
+ .rmi_sib_s,
+ .rmi_sib_u,
.m_sib,
.mi_sib_u,
.mi_sib_s,
@@ -335,6 +341,8 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory {
.rm_rip,
.rmi_rip,
+ .rmi_rip_s,
+ .rmi_rip_u,
.m_rip,
.mi_rip_u,
.mi_rip_s,
@@ -383,13 +391,29 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.rrri => inst.data.rrri.fixes,
.rri_s, .rri_u => inst.data.rri.fixes,
.ri_s, .ri_u => inst.data.ri.fixes,
- .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes,
+ .ri64,
+ .rm_sib,
+ .rm_rip,
+ .rmi_sib_s,
+ .rmi_sib_u,
+ .rmi_rip_s,
+ .rmi_rip_u,
+ .mr_sib,
+ .mr_rip,
+ => inst.data.rx.fixes,
.mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes,
.rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes,
.rrmi_sib, .rrmi_rip => inst.data.rrix.fixes,
.mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => inst.data.x.fixes,
.m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes,
- .extern_fn_reloc, .got_reloc, .extern_got_reloc, .direct_reloc, .direct_got_reloc, .import_reloc, .tlv_reloc => ._,
+ .extern_fn_reloc,
+ .got_reloc,
+ .extern_got_reloc,
+ .direct_reloc,
+ .direct_got_reloc,
+ .import_reloc,
+ .tlv_reloc,
+ => ._,
else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}),
};
try lower.emit(switch (fixes) {
@@ -461,7 +485,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.m_sib, .m_rip => &.{
.{ .mem = lower.mem(inst.ops, inst.data.x.payload) },
},
- .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{
+ .mi_sib_s, .mi_sib_u, .mi_rip_s, .mi_rip_u => &.{
.{ .mem = lower.mem(inst.ops, inst.data.x.payload + 1) },
.{ .imm = lower.imm(
inst.ops,
@@ -477,6 +501,14 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .mem = lower.mem(inst.ops, inst.data.rix.payload) },
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
},
+ .rmi_sib_s, .rmi_sib_u, .rmi_rip_s, .rmi_rip_u => &.{
+ .{ .reg = inst.data.rx.r1 },
+ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload + 1) },
+ .{ .imm = lower.imm(
+ inst.ops,
+ lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm,
+ ) },
+ },
.mr_sib, .mr_rip => &.{
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 },
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 23bef3c03b..3a5d5c0659 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -474,6 +474,10 @@ pub const Inst = struct {
/// Bitwise logical and not of packed single-precision floating-point values
/// Bitwise logical and not of packed double-precision floating-point values
andn,
+ /// Compare packed data for equal
+ cmpeq,
+ /// Compare packed data for greater than
+ cmpgt,
/// Maximum of packed signed integers
maxs,
/// Maximum of packed unsigned integers
@@ -482,6 +486,10 @@ pub const Inst = struct {
mins,
/// Minimum of packed unsigned integers
minu,
+ /// Move byte mask
+ /// Extract packed single precision floating-point sign mask
+ /// Extract packed double precision floating-point sign mask
+ movmsk,
/// Multiply packed signed integers and store low result
mull,
/// Multiply packed signed integers and store high result
@@ -720,9 +728,24 @@ pub const Inst = struct {
/// Register, memory (RIP) operands.
/// Uses `rx` payload.
rm_rip,
- /// Register, memory (SIB), immediate (byte) operands.
+ /// Register, memory (SIB), immediate (word) operands.
/// Uses `rix` payload with extra data of type `MemorySib`.
rmi_sib,
+ /// Register, memory (RIP), immediate (word) operands.
+ /// Uses `rix` payload with extra data of type `MemoryRip`.
+ rmi_rip,
+ /// Register, memory (SIB), immediate (signed) operands.
+ /// Uses `rx` payload with extra data of type `Imm32` followed by `MemorySib`.
+ rmi_sib_s,
+ /// Register, memory (SIB), immediate (unsigned) operands.
+ /// Uses `rx` payload with extra data of type `Imm32` followed by `MemorySib`.
+ rmi_sib_u,
+ /// Register, memory (RIP), immediate (signed) operands.
+ /// Uses `rx` payload with extra data of type `Imm32` followed by `MemoryRip`.
+ rmi_rip_s,
+ /// Register, memory (RIP), immediate (unsigned) operands.
+ /// Uses `rx` payload with extra data of type `Imm32` followed by `MemoryRip`.
+ rmi_rip_u,
/// Register, register, memory (RIP).
/// Uses `rrix` payload with extra data of type `MemoryRip`.
rrm_rip,
@@ -735,27 +758,24 @@ pub const Inst = struct {
/// Register, register, memory (SIB), immediate (byte) operands.
/// Uses `rrix` payload with extra data of type `MemorySib`.
rrmi_sib,
- /// Register, memory (RIP), immediate (byte) operands.
- /// Uses `rix` payload with extra data of type `MemoryRip`.
- rmi_rip,
/// Single memory (SIB) operand.
/// Uses `x` with extra data of type `MemorySib`.
m_sib,
/// Single memory (RIP) operand.
/// Uses `x` with extra data of type `MemoryRip`.
m_rip,
- /// Memory (SIB), immediate (unsigned) operands.
- /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`.
- mi_sib_u,
- /// Memory (RIP), immediate (unsigned) operands.
- /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`.
- mi_rip_u,
/// Memory (SIB), immediate (sign-extend) operands.
/// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`.
mi_sib_s,
+ /// Memory (SIB), immediate (unsigned) operands.
+ /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`.
+ mi_sib_u,
/// Memory (RIP), immediate (sign-extend) operands.
/// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`.
mi_rip_s,
+ /// Memory (RIP), immediate (unsigned) operands.
+ /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`.
+ mi_rip_u,
/// Memory (SIB), register operands.
/// Uses `rx` payload with extra data of type `MemorySib`.
mr_sib,
@@ -768,10 +788,10 @@ pub const Inst = struct {
/// Memory (RIP), register, register operands.
/// Uses `rrx` payload with extra data of type `MemoryRip`.
mrr_rip,
- /// Memory (SIB), register, immediate (byte) operands.
+ /// Memory (SIB), register, immediate (word) operands.
/// Uses `rix` payload with extra data of type `MemorySib`.
mri_sib,
- /// Memory (RIP), register, immediate (byte) operands.
+ /// Memory (RIP), register, immediate (word) operands.
/// Uses `rix` payload with extra data of type `MemoryRip`.
mri_rip,
/// Rax, Memory moffs.
@@ -955,7 +975,7 @@ pub const Inst = struct {
rix: struct {
fixes: Fixes = ._,
r1: Register,
- i: u8,
+ i: u16,
payload: u32,
},
/// Register, register, byte immediate, followed by Custom payload found in extra.
@@ -1010,7 +1030,7 @@ pub const RegisterList = struct {
fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
for (registers, 0..) |cpreg, i| {
- if (reg.id() == cpreg.id()) return @as(u32, @intCast(i));
+ if (reg.id() == cpreg.id()) return @intCast(i);
}
unreachable; // register not in input register list!
}
@@ -1030,7 +1050,7 @@ pub const RegisterList = struct {
}
pub fn count(self: Self) u32 {
- return @as(u32, @intCast(self.bitset.count()));
+ return @intCast(self.bitset.count());
}
};
@@ -1044,14 +1064,14 @@ pub const Imm64 = struct {
pub fn encode(v: u64) Imm64 {
return .{
- .msb = @as(u32, @truncate(v >> 32)),
- .lsb = @as(u32, @truncate(v)),
+ .msb = @truncate(v >> 32),
+ .lsb = @truncate(v),
};
}
pub fn decode(imm: Imm64) u64 {
var res: u64 = 0;
- res |= (@as(u64, @intCast(imm.msb)) << 32);
+ res |= @as(u64, @intCast(imm.msb)) << 32;
res |= @as(u64, @intCast(imm.lsb));
return res;
}
@@ -1075,7 +1095,7 @@ pub const MemorySib = struct {
assert(sib.scale_index.scale == 0 or std.math.isPowerOfTwo(sib.scale_index.scale));
return .{
.ptr_size = @intFromEnum(sib.ptr_size),
- .base_tag = @intFromEnum(@as(Memory.Base.Tag, sib.base)),
+ .base_tag = @intFromEnum(sib.base),
.base = switch (sib.base) {
.none => undefined,
.reg => |r| @intFromEnum(r),
@@ -1091,18 +1111,18 @@ pub const MemorySib = struct {
}
pub fn decode(msib: MemorySib) Memory {
- const scale = @as(u4, @truncate(msib.scale_index));
+ const scale: u4 = @truncate(msib.scale_index);
assert(scale == 0 or std.math.isPowerOfTwo(scale));
return .{ .sib = .{
- .ptr_size = @as(Memory.PtrSize, @enumFromInt(msib.ptr_size)),
+ .ptr_size = @enumFromInt(msib.ptr_size),
.base = switch (@as(Memory.Base.Tag, @enumFromInt(msib.base_tag))) {
.none => .none,
- .reg => .{ .reg = @as(Register, @enumFromInt(msib.base)) },
- .frame => .{ .frame = @as(bits.FrameIndex, @enumFromInt(msib.base)) },
+ .reg => .{ .reg = @enumFromInt(msib.base) },
+ .frame => .{ .frame = @enumFromInt(msib.base) },
},
.scale_index = .{
.scale = scale,
- .index = if (scale > 0) @as(Register, @enumFromInt(msib.scale_index >> 4)) else undefined,
+ .index = if (scale > 0) @enumFromInt(msib.scale_index >> 4) else undefined,
},
.disp = msib.disp,
} };
@@ -1124,7 +1144,7 @@ pub const MemoryRip = struct {
pub fn decode(mrip: MemoryRip) Memory {
return .{ .rip = .{
- .ptr_size = @as(Memory.PtrSize, @enumFromInt(mrip.ptr_size)),
+ .ptr_size = @enumFromInt(mrip.ptr_size),
.disp = mrip.disp,
} };
}
@@ -1141,14 +1161,14 @@ pub const MemoryMoffs = struct {
pub fn encode(seg: Register, offset: u64) MemoryMoffs {
return .{
.seg = @intFromEnum(seg),
- .msb = @as(u32, @truncate(offset >> 32)),
- .lsb = @as(u32, @truncate(offset >> 0)),
+ .msb = @truncate(offset >> 32),
+ .lsb = @truncate(offset >> 0),
};
}
pub fn decode(moffs: MemoryMoffs) Memory {
return .{ .moffs = .{
- .seg = @as(Register, @enumFromInt(moffs.seg)),
+ .seg = @enumFromInt(moffs.seg),
.offset = @as(u64, moffs.msb) << 32 | @as(u64, moffs.lsb) << 0,
} };
}
@@ -1168,7 +1188,7 @@ pub fn extraData(mir: Mir, comptime T: type, index: u32) struct { data: T, end:
inline for (fields) |field| {
@field(result, field.name) = switch (field.type) {
u32 => mir.extra[i],
- i32 => @as(i32, @bitCast(mir.extra[i])),
+ i32 => @bitCast(mir.extra[i]),
else => @compileError("bad field type"),
};
i += 1;
diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig
index 5cffaf4fe0..695f2d585a 100644
--- a/src/arch/x86_64/bits.zig
+++ b/src/arch/x86_64/bits.zig
@@ -232,7 +232,7 @@ pub const Register = enum(u7) {
else => unreachable,
// zig fmt: on
};
- return @as(u6, @intCast(@intFromEnum(reg) - base));
+ return @intCast(@intFromEnum(reg) - base);
}
pub fn bitSize(reg: Register) u64 {
@@ -291,11 +291,11 @@ pub const Register = enum(u7) {
else => unreachable,
// zig fmt: on
};
- return @as(u4, @truncate(@intFromEnum(reg) - base));
+ return @truncate(@intFromEnum(reg) - base);
}
pub fn lowEnc(reg: Register) u3 {
- return @as(u3, @truncate(reg.enc()));
+ return @truncate(reg.enc());
}
pub fn toBitSize(reg: Register, bit_size: u64) Register {
@@ -325,19 +325,19 @@ pub const Register = enum(u7) {
}
pub fn to64(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.rax)));
+ return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.rax));
}
pub fn to32(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.eax)));
+ return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.eax));
}
pub fn to16(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.ax)));
+ return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.ax));
}
pub fn to8(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.al)));
+ return @enumFromInt(@intFromEnum(reg) - reg.gpBase() + @intFromEnum(Register.al));
}
fn sseBase(reg: Register) u7 {
@@ -350,11 +350,11 @@ pub const Register = enum(u7) {
}
pub fn to256(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.ymm0)));
+ return @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.ymm0));
}
pub fn to128(reg: Register) Register {
- return @as(Register, @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.xmm0)));
+ return @enumFromInt(@intFromEnum(reg) - reg.sseBase() + @intFromEnum(Register.xmm0));
}
/// DWARF register encoding
@@ -619,7 +619,7 @@ pub const Immediate = union(enum) {
1, 8 => @as(i8, @bitCast(@as(u8, @intCast(x)))),
16 => @as(i16, @bitCast(@as(u16, @intCast(x)))),
32 => @as(i32, @bitCast(@as(u32, @intCast(x)))),
- 64 => @as(i64, @bitCast(x)),
+ 64 => @bitCast(x),
else => unreachable,
},
};
diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig
index d6efb4cfc7..af764882c8 100644
--- a/src/arch/x86_64/encodings.zig
+++ b/src/arch/x86_64/encodings.zig
@@ -905,6 +905,9 @@ pub const table = [_]Entry{
.{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse },
+ .{ .movmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse },
+ .{ .movmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse },
+
.{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse },
.{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse },
@@ -917,6 +920,9 @@ pub const table = [_]Entry{
.{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse },
+ .{ .pmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
+ .{ .pmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
+
.{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },
@@ -1005,6 +1011,12 @@ pub const table = [_]Entry{
.{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 },
.{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 },
+ .{ .movmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 },
+ .{ .movmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 },
+
+ .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 },
+ .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 },
+
.{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 },
.{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 },
@@ -1037,6 +1049,14 @@ pub const table = [_]Entry{
.{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 },
+ .{ .pcmpeqb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .none, .sse2 },
+ .{ .pcmpeqw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x75 }, 0, .none, .sse2 },
+ .{ .pcmpeqd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x76 }, 0, .none, .sse2 },
+
+ .{ .pcmpgtb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x64 }, 0, .none, .sse2 },
+ .{ .pcmpgtw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x65 }, 0, .none, .sse2 },
+ .{ .pcmpgtd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x66 }, 0, .none, .sse2 },
+
.{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 },
.{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 },
@@ -1100,9 +1120,6 @@ pub const table = [_]Entry{
.{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 },
- .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 },
- .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 },
-
.{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .none, .sse2 },
.{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 },
@@ -1137,6 +1154,8 @@ pub const table = [_]Entry{
.{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 },
+ .{ .pcmpeqq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .none, .sse4_1 },
+
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
.{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 },
.{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 },
@@ -1171,6 +1190,9 @@ pub const table = [_]Entry{
.{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 },
+ // SSE4.2
+ .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
+
// AVX
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
@@ -1295,6 +1317,16 @@ pub const table = [_]Entry{
.{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx },
+ .{ .vmovmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .vex_128_wig, .avx },
+ .{ .vmovmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .vex_128_wig, .avx },
+ .{ .vmovmskps, .rm, &.{ .r32, .ymm }, &.{ 0x0f, 0x50 }, 0, .vex_256_wig, .avx },
+ .{ .vmovmskps, .rm, &.{ .r64, .ymm }, &.{ 0x0f, 0x50 }, 0, .vex_256_wig, .avx },
+
+ .{ .vmovmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_128_wig, .avx },
+ .{ .vmovmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_128_wig, .avx },
+ .{ .vmovmskpd, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_256_wig, .avx },
+ .{ .vmovmskpd, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0x50 }, 0, .vex_256_wig, .avx },
+
.{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx },
.{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx },
@@ -1408,6 +1440,18 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
+ .{ .vpcmpeqb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_128_wig, .avx },
+ .{ .vpcmpeqw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_128_wig, .avx },
+ .{ .vpcmpeqd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpcmpeqq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpcmpgtb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x64 }, 0, .vex_128_wig, .avx },
+ .{ .vpcmpgtw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x65 }, 0, .vex_128_wig, .avx },
+ .{ .vpcmpgtd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x66 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpcmpgtq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_128_wig, .avx },
+
.{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx },
.{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx },
.{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx },
@@ -1439,6 +1483,9 @@ pub const table = [_]Entry{
.{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx },
+ .{ .vpmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx },
+
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -1581,29 +1628,44 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
- .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx },
- .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx },
- .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx },
+ .{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpcmpeqd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x76 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpcmpeqq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpcmpgtb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x64 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpcmpgtw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x65 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpcmpgtd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x66 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpcmpgtq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx2 },
- .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx },
- .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx },
+ .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx2 },
- .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx },
+ .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx2 },
+ .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx2 },
- .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx },
- .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx },
- .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx },
+ .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx2 },
+ .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx2 },
- .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx },
- .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx },
+ .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx2 },
- .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx },
+ .{ .vpmovmskb, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovmskb, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 },
- .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx },
+ .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 },
- .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },
+ .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 },
- .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx },
+ .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx2 },
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },