From a4eabd39794014c871670937155c94e11fce991b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 17 May 2025 20:30:02 -0400 Subject: x86_64: implement `vector_store_elem` --- src/arch/x86_64/CodeGen.zig | 424 ++++++++++++++++++++++++++++++++++++++++++- test/behavior/vector.zig | 1 - test/behavior/x86_64/mem.zig | 26 ++- 3 files changed, 448 insertions(+), 3 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index be41bdeff4..66f3380fda 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -98180,6 +98180,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .dst0d, .memsi(.src0d, .@"4", .src1), ._, ._ }, } }, }, .{ + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{ .{ .int = .qword }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .simm32, .none } }, @@ -120534,7 +120535,416 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ert.die(cg); try res.finish(inst, &.{}, &.{}, cg); }, - .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}), + .vector_store_elem => { + const extra = air_datas[@intFromEnum(inst)].vector_store_elem; + const bin_op = cg.air.extraData(Air.Bin, extra.payload).data; + var ops = try cg.tempsFromOperands(inst, .{ extra.vector_ptr, bin_op.lhs, bin_op.rhs }); + cg.select(&.{}, &.{}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, + .{ .@"0:", ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .lea(.src0w), .src1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .dword }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .lea(.src0d), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0d), .tmp0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .qword }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .lea(.src0q), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .tmp0q, ._, ._ }, + .{ ._, ._r, .bt, .tmp1q, .src1q, ._, ._ }, + .{ ._, ._s, .bt, .tmp0q, .src1q, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0q), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(5), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .leasi(.src0d, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._r, .bt, .tmp2d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp1d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .leasi(.src0d, .@"4", .tmp0), .tmp1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm8 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0b, .add_src0_elem_size_mul_src1), .src2b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm8 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leai(.src0b, .src1), .src2b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm16 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm16 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm32 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm32 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .simm32, .simm32 } }, + .{ .src = .{ .to_mem, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .simm32 } }, + .{ .src = .{ .to_mem, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => { + const elem_size = cg.typeOf(extra.vector_ptr).childType(zcu).childType(zcu).abiSize(zcu); + while (try ops[0].toBase(false, cg) or + try ops[1].toRegClass(true, .general_purpose, cg)) + {} + const base_reg = ops[0].tracking(cg).short.register.to64(); + const rhs_reg = ops[1].tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), + ); + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), + ); + } else try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + }), + ); + try ops[0].store(&ops[1], .{}, cg); + }, + else => |e| return e, + }; + for (ops) |op| try op.die(cg); + }, .c_va_arg => try cg.airVaArg(inst), .c_va_copy => try cg.airVaCopy(inst), .c_va_end => try cg.airVaEnd(inst), @@ -144898,7 +145308,10 @@ const Select = struct { any_scalar_unsigned_int, any_float, po2_any, + bool, bool_vec: Memory.Size, + ptr_bool_vec: Memory.Size, + ptr_any_bool_vec, signed_int_vec: Memory.Size, signed_int_or_full_vec: Memory.Size, unsigned_int_vec: Memory.Size, @@ -144974,8 +145387,17 @@ const Select = struct { .any_scalar_unsigned_int => if (cg.intInfo(ty.scalarType(zcu))) |int_info| int_info.signedness == .unsigned else false, .any_float => ty.isRuntimeFloat(), .po2_any => std.math.isPowerOfTwo(ty.abiSize(zcu)), + .bool => ty.toIntern() == .bool_type, .bool_vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and size.bitSize(cg.target) >= ty.vectorLen(zcu), + .ptr_any_bool_vec => switch (zcu.intern_pool.indexToKey(ty.childType(zcu).toIntern())) { + .vector_type => |vector_type| vector_type.child == .bool_type, + else => false, + }, + .ptr_bool_vec => |size| switch (zcu.intern_pool.indexToKey(ty.childType(zcu).toIntern())) { + .vector_type => |vector_type| vector_type.child == .bool_type and size.bitSize(cg.target) >= vector_type.len, + else => false, + }, .signed_int_vec => |size| ty.isVector(zcu) and @divExact(size.bitSize(cg.target), 8) >= ty.abiSize(zcu) and if (cg.intInfo(ty.childType(zcu))) |int_info| int_info.signedness == .signed else false, .signed_int_or_full_vec => |size| ty.isVector(zcu) and @divExact(size.bitSize(cg.target), 8) >= ty.abiSize(zcu) and diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 497a3df310..818fbcd7e4 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -1391,7 +1391,6 @@ test "store packed vector element" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; diff --git a/test/behavior/x86_64/mem.zig b/test/behavior/x86_64/mem.zig index 139e3a1471..5c6cbe0301 100644 --- a/test/behavior/x86_64/mem.zig +++ b/test/behavior/x86_64/mem.zig @@ -1,3 +1,7 @@ +const math = @import("math.zig"); +const imax = math.imax; +const imin = math.imin; + fn accessSlice(comptime array: anytype) !void { var slice: []const @typeInfo(@TypeOf(array)).array.child = undefined; slice = &array; @@ -38,13 +42,33 @@ test accessSlice { fn accessVector(comptime init: anytype) !void { const Vector = @TypeOf(init); + const Elem = @typeInfo(Vector).vector.child; + const ct_vals: [2]Elem = switch (Elem) { + bool => .{ false, true }, + else => .{ imin(Elem), imax(Elem) }, + }; + var rt_vals: [2]Elem = undefined; + rt_vals = ct_vals; var vector: Vector = undefined; vector = init; inline for (0..@typeInfo(Vector).vector.len) |ct_index| { var rt_index: usize = undefined; rt_index = ct_index; if (&vector[rt_index] != &vector[ct_index]) return error.Unexpected; - if (vector[rt_index] != vector[ct_index]) return error.Unexpected; + if (vector[rt_index] != init[ct_index]) return error.Unexpected; + if (vector[ct_index] != init[ct_index]) return error.Unexpected; + vector[rt_index] = rt_vals[0]; + if (vector[rt_index] != ct_vals[0]) return error.Unexpected; + if (vector[ct_index] != ct_vals[0]) return error.Unexpected; + vector[rt_index] = ct_vals[1]; + if (vector[rt_index] != ct_vals[1]) return error.Unexpected; + if (vector[ct_index] != ct_vals[1]) return error.Unexpected; + vector[ct_index] = ct_vals[0]; + if (vector[rt_index] != ct_vals[0]) return error.Unexpected; + if (vector[ct_index] != ct_vals[0]) return error.Unexpected; + vector[ct_index] = rt_vals[1]; + if (vector[rt_index] != ct_vals[1]) return error.Unexpected; + if (vector[ct_index] != ct_vals[1]) return error.Unexpected; } } test accessVector { -- cgit v1.2.3