diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2025-09-11 08:40:17 -0400 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-09-20 18:33:01 -0700 |
| commit | 60cdacaff2bc7a4ff32cb440591ddaf4bdfdd0af (patch) | |
| tree | 84b7a3c82bfdb79d067631281a1d916fc2fa607c /src | |
| parent | 2ba03e98c88f39312ef5e1df9f14e06efd9bbcad (diff) | |
| download | zig-60cdacaff2bc7a4ff32cb440591ddaf4bdfdd0af.tar.gz zig-60cdacaff2bc7a4ff32cb440591ddaf4bdfdd0af.zip | |
x86_64: rewrite vector element pointer access
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 673 |
1 files changed, 328 insertions, 345 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6ff95b986a..b5760c95ac 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2291,7 +2291,7 @@ fn genBodyBlock(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(29_400); + @setEvalBranchQuota(29_500); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -86774,52 +86774,313 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const is_non_err = try cg.tempInit(.bool, .{ .eflags = .e }); try is_non_err.finish(inst, &.{un_op}, &ops, cg); }, - .load => fallback: { + .load => { const ty_op = air_datas[@intFromEnum(inst)].ty_op; const val_ty = ty_op.ty.toType(); - const ptr_ty = cg.typeOf(ty_op.operand); - const ptr_info = ptr_ty.ptrInfo(zcu); - if (ptr_info.packed_offset.host_size > 0 and - (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) - break :fallback try cg.airLoad(inst); var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - const res = try ops[0].load(val_ty, .{ - .disp = switch (ptr_info.flags.vector_index) { - .none => 0, - .runtime => unreachable, - else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), + var res: [1]Temp = undefined; + cg.select(&res, &.{val_ty}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .none, .none } }, }, - }, cg); - try res.finish(inst, &.{ty_op.operand}, &ops, cg); + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec_elem, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => res[0] = try ops[0].load(val_ty, .{ + .disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), + }, + }, cg), + else => |e| return e, + }; + try res[0].finish(inst, &.{ty_op.operand}, &ops, cg); }, .ret => try cg.airRet(inst, false), .ret_safe => try cg.airRet(inst, true), .ret_load => try cg.airRetLoad(inst), - .store, .store_safe => |air_tag| fallback: { + .store, .store_safe => |air_tag| { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - const ptr_ty = cg.typeOf(bin_op.lhs); - const ptr_info = ptr_ty.ptrInfo(zcu); - const val_ty = cg.typeOf(bin_op.rhs); - if (ptr_info.packed_offset.host_size > 0 and - (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) - break :fallback try cg.airStore(inst, switch (air_tag) { - else => unreachable, - .store => false, - .store_safe => true, - }); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - try ops[0].store(&ops[1], .{ - .disp = switch (ptr_info.flags.vector_index) { - .none => 0, - .runtime => unreachable, - else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), + cg.select(&.{}, &.{}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } }, }, - .safe = switch (air_tag) { - else => unreachable, - .store => false, - .store_safe => true, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, }, - }, cg); + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .byte }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, + .{ .@"0:", ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec_elem = .word }, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .lea(.src0w), .ua(.src0, .add_vector_index), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 0 }, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .{ .imm = 1 }, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .leaa(.src0d, .add_vector_index_div_8_down_4), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .leaa(.src0d, .add_vector_index_div_8_down_4), .tmp0d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec_elem, .bool, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src1b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .leaa(.src0d, .add_vector_index_div_8_down_4), .ua(.src0, .add_vector_index_rem_32), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => try ops[0].store(&ops[1], .{ + .disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)), + }, + .safe = switch (air_tag) { + else => unreachable, + .store => false, + .store_safe => true, + }, + }, cg), + else => |e| return e, + }; for (ops) |op| try op.die(cg); }, .unreach => {}, @@ -100863,7 +101124,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .cc = .c }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ }, + .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1), ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .bool_vec = .dword }, .any, .any }, @@ -100884,7 +101145,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .{ .cc = .c }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ }, + .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1), ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, @@ -174481,114 +174742,6 @@ fn reuseOperandAdvanced( return true; } -fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { - const pt = self.pt; - const zcu = pt.zcu; - - const ptr_info = ptr_ty.ptrInfo(zcu); - const val_ty: Type = .fromInterned(ptr_info.child); - if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; - const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); - - const val_bit_size: u32 = @intCast(val_ty.bitSize(zcu)); - const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { - .none => 0, - .runtime => unreachable, - else => |vector_index| @intFromEnum(vector_index) * val_bit_size, - }; - if (ptr_bit_off % 8 == 0) { - { - const mat_ptr_mcv: MCValue = switch (ptr_mcv) { - .immediate, .register, .register_offset, .lea_frame => ptr_mcv, - else => .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, - }; - const mat_ptr_lock = switch (mat_ptr_mcv) { - .register => |mat_ptr_reg| self.register_manager.lockReg(mat_ptr_reg), - else => null, - }; - defer if (mat_ptr_lock) |lock| self.register_manager.unlockReg(lock); - - try self.load(dst_mcv, ptr_ty, mat_ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8)))); - } - - if (val_abi_size * 8 > val_bit_size) { - if (dst_mcv.isRegister()) { - try self.truncateRegister(val_ty, dst_mcv.getReg().?); - } else { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); - try self.truncateRegister(val_ty, tmp_reg); - try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); - } - } - return; - } - - if (val_abi_size > 8) return self.fail("TODO implement packed load of {f}", .{val_ty.fmt(pt)}); - - const limb_abi_size: u31 = @min(val_abi_size, 8); - const limb_abi_bits = limb_abi_size * 8; - const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); - const val_bit_off = ptr_bit_off % limb_abi_bits; - const val_extra_bits = self.regExtraBits(val_ty); - - const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); - const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); - defer self.register_manager.unlockReg(ptr_lock); - - const dst_reg = switch (dst_mcv) { - .register => |reg| reg, - else => try self.register_manager.allocReg(null, abi.RegisterClass.gp), - }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const load_abi_size = - if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2; - if (load_abi_size <= 8) { - const load_reg = registerAlias(dst_reg, load_abi_size); - try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{ - .base = .{ .reg = ptr_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(load_abi_size), - .disp = val_byte_off, - } }, - }); - try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off)); - } else { - const tmp_reg = - registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const dst_alias = registerAlias(dst_reg, val_abi_size); - try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{ - .base = .{ .reg = ptr_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(val_abi_size), - .disp = val_byte_off, - } }, - }); - try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{ - .base = .{ .reg = ptr_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(val_abi_size), - .disp = val_byte_off + limb_abi_size, - } }, - }); - try self.spillEflagsIfOccupied(); - try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off)); - } - - if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); - try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{}); -} - fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { const pt = self.pt; const zcu = pt.zcu; @@ -174636,174 +174789,6 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE } } -fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const elem_ty = self.typeOfIndex(inst); - const result: MCValue = result: { - if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - - try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const ptr_ty = self.typeOf(ty_op.operand); - const elem_size = elem_ty.abiSize(zcu); - - const elem_rs = self.regSetForType(elem_ty); - const ptr_rs = self.regSetForType(ptr_ty); - - const ptr_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and - elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) - // The MCValue that holds the pointer can be re-used as the value. - ptr_mcv - else - try self.allocRegOrMem(inst, true); - - const ptr_info = ptr_ty.ptrInfo(zcu); - if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { - try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); - } else { - try self.load(dst_mcv, ptr_ty, ptr_mcv); - } - - if (elem_ty.isAbiInt(zcu) and elem_size * 8 > elem_ty.bitSize(zcu)) { - const high_mcv: MCValue = switch (dst_mcv) { - .register => |dst_reg| .{ .register = dst_reg }, - .register_pair => |dst_regs| .{ .register = dst_regs[1] }, - else => dst_mcv.address().offset(@intCast((elem_size - 1) / 8 * 8)).deref(), - }; - const high_reg = if (high_mcv.isRegister()) - high_mcv.getReg().? - else - try self.copyToTmpRegister(.usize, high_mcv); - const high_lock = self.register_manager.lockReg(high_reg); - defer if (high_lock) |lock| self.register_manager.unlockReg(lock); - - try self.truncateRegister(elem_ty, high_reg); - if (!high_mcv.isRegister()) try self.genCopy( - if (elem_size <= 8) elem_ty else .usize, - high_mcv, - .{ .register = high_reg }, - .{}, - ); - } - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const pt = self.pt; - const zcu = pt.zcu; - const ptr_info = ptr_ty.ptrInfo(zcu); - const src_ty: Type = .fromInterned(ptr_info.child); - if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; - - const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8); - const limb_abi_bits = limb_abi_size * 8; - const limb_ty = try pt.intType(.unsigned, limb_abi_bits); - - const src_bit_size = src_ty.bitSize(zcu); - const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { - .none => 0, - .runtime => unreachable, - else => |vector_index| @intFromEnum(vector_index) * src_bit_size, - }; - const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); - const src_bit_off = ptr_bit_off % limb_abi_bits; - - const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); - const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); - defer self.register_manager.unlockReg(ptr_lock); - - const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { - .register => if (src_bit_size > 64) { - const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); - break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; - } else src_mcv, - else => src_mcv, - }; - - var limb_i: u16 = 0; - while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) { - const part_bit_off = if (limb_i == 0) src_bit_off else 0; - const part_bit_size = - @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off; - const limb_mem: Memory = .{ - .base = .{ .reg = ptr_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(limb_abi_size), - .disp = src_byte_off + limb_i * limb_abi_size, - } }, - }; - - const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) << - @intCast(part_bit_off); - const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits)); - if (limb_abi_size <= 4) { - try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not)); - } else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| { - try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small)); - } else { - const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not)); - try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); - } - - if (src_bit_size <= 64) { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{}); - switch (limb_i) { - 0 => try self.genShiftBinOpMir( - .{ ._l, .sh }, - limb_ty, - tmp_mcv, - .u8, - .{ .immediate = src_bit_off }, - ), - 1 => try self.genShiftBinOpMir( - .{ ._r, .sh }, - limb_ty, - tmp_mcv, - .u8, - .{ .immediate = limb_abi_bits - src_bit_off }, - ), - else => unreachable, - } - try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); - try self.asmMemoryRegister( - .{ ._, .@"or" }, - limb_mem, - registerAlias(tmp_reg, limb_abi_size), - ); - } else if (src_bit_size <= 128 and src_bit_off == 0) { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) { - 0 => mat_src_mcv, - else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(), - }, .{}); - try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); - try self.asmMemoryRegister( - .{ ._, .@"or" }, - limb_mem, - registerAlias(tmp_reg, limb_abi_size), - ); - } else return self.fail("TODO: implement packed store of {f}", .{src_ty.fmt(pt)}); - } -} - fn store( self: *CodeGen, ptr_ty: Type, @@ -174857,35 +174842,6 @@ fn store( } } -fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - - result: { - if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result; - - try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const ptr_ty = self.typeOf(bin_op.lhs); - const ptr_info = ptr_ty.ptrInfo(zcu); - const is_packed = ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0; - if (is_packed) try self.spillEflagsIfOccupied(); - - const src_mcv = try self.resolveInst(bin_op.rhs); - const ptr_mcv = try self.resolveInst(bin_op.lhs); - - if (is_packed) { - try self.packedStore(ptr_ty, ptr_mcv, src_mcv); - } else { - try self.store(ptr_ty, ptr_mcv, src_mcv, .{ .safety = safety }); - } - } - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); -} - fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { const pt = self.pt; const zcu = pt.zcu; @@ -192171,6 +192127,8 @@ const Select = struct { exact_bool_vec: u16, ptr_any_bool_vec, ptr_bool_vec: Memory.Size, + ptr_any_bool_vec_elem, + ptr_bool_vec_elem: Memory.Size, remainder_bool_vec: OfIsSizes, exact_remainder_bool_vec: struct { of: Memory.Size, is: u16 }, signed_int_vec: Memory.Size, @@ -192273,6 +192231,22 @@ const Select = struct { .vector_type => |vector_type| vector_type.child == .bool_type and size.bitSize(cg.target) >= vector_type.len, else => false, }, + .ptr_any_bool_vec_elem => { + const ptr_info = ty.ptrInfo(zcu); + return switch (ptr_info.flags.vector_index) { + .none => false, + .runtime => unreachable, + else => ptr_info.child == .bool_type, + }; + }, + .ptr_bool_vec_elem => |size| { + const ptr_info = ty.ptrInfo(zcu); + return switch (ptr_info.flags.vector_index) { + .none => false, + .runtime => unreachable, + else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size, + }; + }, .remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and of_is.is.bitSize(cg.target) >= (ty.vectorLen(zcu) - 1) % of_is.of.bitSize(cg.target) + 1, .exact_remainder_bool_vec => |of_is| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and @@ -193266,7 +193240,7 @@ const Select = struct { ref: Ref, scale: Memory.Scale = .@"1", } = .{ .ref = .none }, - unused: u3 = 0, + unused: u2 = 0, }, imm: i32 = 0, @@ -193279,9 +193253,9 @@ const Select = struct { lea, mem, }; - const Adjust = packed struct(u10) { + const Adjust = packed struct(u11) { sign: enum(u1) { neg, pos }, - lhs: enum(u5) { + lhs: enum(u6) { none, ptr_size, ptr_bit_size, @@ -193303,6 +193277,7 @@ const Select = struct { src0_elem_size, dst0_elem_size, src0_elem_size_mul_src1, + vector_index, src1, src1_sub_bit_size, log2_src0_elem_size, @@ -193373,9 +193348,13 @@ const Select = struct { const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" }; const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; + const add_vector_index: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .mul, .rhs = .@"1" }; + const add_vector_index_rem_32: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .rem_8_mul, .rhs = .@"4" }; + const add_vector_index_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .vector_index, .op = .div_8_down, .rhs = .@"4" }; const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" }; const sub_dst0_elem_size: Adjust = .{ .sign = .neg, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" }; const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" }; + const add_src1: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .mul, .rhs = .@"1" }; const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" }; const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" }; const add_src1_sub_bit_size: Adjust = .{ .sign = .pos, .lhs = .src1_sub_bit_size, .op = .mul, .rhs = .@"1" }; @@ -194258,6 +194237,10 @@ const Select = struct { .dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), .src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * Select.Operand.Ref.src1.valueOf(s).immediate), + .vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) { + .none, .runtime => unreachable, + else => |vector_index| @intFromEnum(vector_index), + }, .src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate), .src1_sub_bit_size => @as(SignedImm, @intCast(Select.Operand.Ref.src1.valueOf(s).immediate)) - @as(SignedImm, @intCast(s.cg.nonBoolScalarBitSize(op.flags.base.ref.typeOf(s)))), |
