Merge pull request #24011 from jacobly0/legalize-unary

Legalize: implement scalarization and safety check expansion
author: Jacob Young <15544577+jacobly0@users.noreply.github.com> 2025-06-01 22:02:34 -0400
committer: GitHub <noreply@github.com> 2025-06-01 22:02:34 -0400
commit: 8dbd29cc4588cf118532a816d74b78f62999b636 (patch)
tree: 0fc19e694d1ad7366dd2b7153dd0d4647d3a9159 /src/codegen
parent: 0386730777da858908aaba4ef96fb5bd48faafc9 (diff)
parent: 6a63c8653ae9121f1cbcee49d32ec4f8deaf0b65 (diff)
download: zig-8dbd29cc4588cf118532a816d74b78f62999b636.tar.gz
zig-8dbd29cc4588cf118532a816d74b78f62999b636.zip
4 files changed, 374 insertions, 107 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 8539efdbfe..c68abc06ce 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
 const mem = std.mem;
 const log = std.log.scoped(.c);
 
+const dev = @import("../dev.zig");
 const link = @import("../link.zig");
 const Zcu = @import("../Zcu.zig");
 const Module = @import("../Package/Module.zig");
@@ -20,6 +21,15 @@ const Alignment = InternPool.Alignment;
 const BigIntLimb = std.math.big.Limb;
 const BigInt = std.math.big.int;
 
+pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features {
+    return if (dev.env.supports(.legalize)) comptime &.initMany(&.{
+        .expand_intcast_safe,
+        .expand_add_safe,
+        .expand_sub_safe,
+        .expand_mul_safe,
+    }) else null; // we don't currently ask zig1 to use safe optimization modes
+}
+
 pub const CType = @import("c/Type.zig");
 
 pub const CValue = union(enum) {
@@ -206,7 +216,6 @@ const reserved_idents = std.StaticStringMap(void).initComptime(.{
     .{ "atomic_ushort", {} },
     .{ "atomic_wchar_t", {} },
     .{ "auto", {} },
-    .{ "bool", {} },
     .{ "break", {} },
     .{ "case", {} },
     .{ "char", {} },
@@ -266,6 +275,11 @@ const reserved_idents = std.StaticStringMap(void).initComptime(.{
     .{ "va_end", {} },
     .{ "va_copy", {} },
 
+    // stdbool.h
+    .{ "bool", {} },
+    .{ "false", {} },
+    .{ "true", {} },
+
     // stddef.h
     .{ "offsetof", {} },
 
@@ -1591,7 +1605,7 @@ pub const DeclGen = struct {
                         try writer.writeAll("((");
                         try dg.renderCType(writer, ctype);
                         return writer.print("){x})", .{
-                            try dg.fmtIntLiteral(try pt.undefValue(.usize), .Other),
+                            try dg.fmtIntLiteral(.undef_usize, .Other),
                         });
                     },
                     .slice => {
@@ -1605,7 +1619,7 @@ pub const DeclGen = struct {
                         const ptr_ty = ty.slicePtrFieldType(zcu);
                         try dg.renderType(writer, ptr_ty);
                         return writer.print("){x}, {0x}}}", .{
-                            try dg.fmtIntLiteral(try dg.pt.undefValue(.usize), .Other),
+                            try dg.fmtIntLiteral(.undef_usize, .Other),
                         });
                     },
                 },
@@ -3360,7 +3374,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .error_name       => try airErrorName(f, inst),
             .splat            => try airSplat(f, inst),
             .select           => try airSelect(f, inst),
-            .shuffle          => try airShuffle(f, inst),
+            .shuffle_one      => try airShuffleOne(f, inst),
+            .shuffle_two      => try airShuffleTwo(f, inst),
             .reduce           => try airReduce(f, inst),
             .aggregate_init   => try airAggregateInit(f, inst),
             .union_init       => try airUnionInit(f, inst),
@@ -4179,7 +4194,7 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info:
     try v.elem(f, w);
     try w.writeAll(", ");
     try f.writeCValue(w, rhs, .FunctionArgument);
-    try v.elem(f, w);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, w);
     try f.object.dg.renderBuiltinInfo(w, scalar_ty, info);
     try w.writeAll(");\n");
     try v.end(f, inst, w);
@@ -6376,7 +6391,7 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue {
             if (operand_child_ctype.info(ctype_pool) == .array) {
                 try writer.writeByte('&');
                 try f.writeCValueDeref(writer, operand);
-                try writer.print("[{}]", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))});
+                try writer.print("[{}]", .{try f.fmtIntLiteral(.zero_usize)});
             } else try f.writeCValue(writer, operand, .Other);
         }
         try a.end(f, writer);
@@ -6536,7 +6551,7 @@ fn airBinBuiltinCall(
     try v.elem(f, writer);
     try writer.writeAll(", ");
     try f.writeCValue(writer, rhs, .FunctionArgument);
-    try v.elem(f, writer);
+    if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, writer);
     try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info);
     try writer.writeAll(");\n");
     try v.end(f, inst, writer);
@@ -6907,7 +6922,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
         try writer.writeAll("for (");
         try f.writeCValue(writer, index, .Other);
         try writer.writeAll(" = ");
-        try f.object.dg.renderValue(writer, try pt.intValue(.usize, 0), .Other);
+        try f.object.dg.renderValue(writer, .zero_usize, .Other);
         try writer.writeAll("; ");
         try f.writeCValue(writer, index, .Other);
         try writer.writeAll(" != ");
@@ -7149,34 +7164,73 @@ fn airSelect(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airShuffleOne(f: *Function, inst: Air.Inst.Index) !CValue {
     const pt = f.object.dg.pt;
     const zcu = pt.zcu;
-    const ty_pl = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
-    const extra = f.air.extraData(Air.Shuffle, ty_pl.payload).data;
 
-    const mask = Value.fromInterned(extra.mask);
-    const lhs = try f.resolveInst(extra.a);
-    const rhs = try f.resolveInst(extra.b);
-
-    const inst_ty = f.typeOfIndex(inst);
+    const unwrapped = f.air.unwrapShuffleOne(zcu, inst);
+    const mask = unwrapped.mask;
+    const operand = try f.resolveInst(unwrapped.operand);
+    const inst_ty = unwrapped.result_ty;
 
     const writer = f.object.writer();
     const local = try f.allocLocal(inst, inst_ty);
-    try reap(f, inst, &.{ extra.a, extra.b }); // local cannot alias operands
-    for (0..extra.mask_len) |index| {
+    try reap(f, inst, &.{unwrapped.operand}); // local cannot alias operand
+    for (mask, 0..) |mask_elem, out_idx| {
         try f.writeCValue(writer, local, .Other);
         try writer.writeByte('[');
-        try f.object.dg.renderValue(writer, try pt.intValue(.usize, index), .Other);
+        try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
         try writer.writeAll("] = ");
+        switch (mask_elem.unwrap()) {
+            .elem => |src_idx| {
+                try f.writeCValue(writer, operand, .Other);
+                try writer.writeByte('[');
+                try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+                try writer.writeByte(']');
+            },
+            .value => |val| try f.object.dg.renderValue(writer, .fromInterned(val), .Other),
+        }
+        try writer.writeAll(";\n");
+    }
 
-        const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
-        const src_val = try pt.intValue(.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63)));
+    return local;
+}
+
+fn airShuffleTwo(f: *Function, inst: Air.Inst.Index) !CValue {
+    const pt = f.object.dg.pt;
+    const zcu = pt.zcu;
+
+    const unwrapped = f.air.unwrapShuffleTwo(zcu, inst);
+    const mask = unwrapped.mask;
+    const operand_a = try f.resolveInst(unwrapped.operand_a);
+    const operand_b = try f.resolveInst(unwrapped.operand_b);
+    const inst_ty = unwrapped.result_ty;
+    const elem_ty = inst_ty.childType(zcu);
 
-        try f.writeCValue(writer, if (mask_elem >= 0) lhs else rhs, .Other);
+    const writer = f.object.writer();
+    const local = try f.allocLocal(inst, inst_ty);
+    try reap(f, inst, &.{ unwrapped.operand_a, unwrapped.operand_b }); // local cannot alias operands
+    for (mask, 0..) |mask_elem, out_idx| {
+        try f.writeCValue(writer, local, .Other);
         try writer.writeByte('[');
-        try f.object.dg.renderValue(writer, src_val, .Other);
-        try writer.writeAll("];\n");
+        try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
+        try writer.writeAll("] = ");
+        switch (mask_elem.unwrap()) {
+            .a_elem => |src_idx| {
+                try f.writeCValue(writer, operand_a, .Other);
+                try writer.writeByte('[');
+                try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+                try writer.writeByte(']');
+            },
+            .b_elem => |src_idx| {
+                try f.writeCValue(writer, operand_b, .Other);
+                try writer.writeByte('[');
+                try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
+                try writer.writeByte(']');
+            },
+            .undef => try f.object.dg.renderUndefValue(writer, elem_ty, .Other),
+        }
+        try writer.writeAll(";\n");
     }
 
     return local;
@@ -8311,11 +8365,11 @@ const Vectorize = struct {
 
             try writer.writeAll("for (");
             try f.writeCValue(writer, local, .Other);
-            try writer.print(" = {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))});
+            try writer.print(" = {d}; ", .{try f.fmtIntLiteral(.zero_usize)});
             try f.writeCValue(writer, local, .Other);
             try writer.print(" < {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, ty.vectorLen(zcu)))});
             try f.writeCValue(writer, local, .Other);
-            try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(try pt.intValue(.usize, 1))});
+            try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(.one_usize)});
             f.object.indent_writer.pushIndent();
 
             break :index .{ .index = local };
diff --git a/src/codegen/c/Type.zig b/src/codegen/c/Type.zig
index 7d3a485e2a..e5901ec626 100644
--- a/src/codegen/c/Type.zig
+++ b/src/codegen/c/Type.zig
@@ -1408,6 +1408,15 @@ pub const Pool = struct {
                 .bits = pt.zcu.errorSetBits(),
             }, mod, kind),
 
+            .ptr_usize_type,
+            => return pool.getPointer(allocator, .{
+                .elem_ctype = .usize,
+            }),
+            .ptr_const_comptime_int_type,
+            => return pool.getPointer(allocator, .{
+                .elem_ctype = .void,
+                .@"const" = true,
+            }),
             .manyptr_u8_type,
             => return pool.getPointer(allocator, .{
                 .elem_ctype = .u8,
@@ -1418,11 +1427,6 @@ pub const Pool = struct {
                 .elem_ctype = .u8,
                 .@"const" = true,
             }),
-            .single_const_pointer_to_comptime_int_type,
-            => return pool.getPointer(allocator, .{
-                .elem_ctype = .void,
-                .@"const" = true,
-            }),
             .slice_const_u8_type,
             .slice_const_u8_sentinel_0_type,
             => {
@@ -2157,11 +2161,16 @@ pub const Pool = struct {
             },
 
             .undef,
+            .undef_bool,
+            .undef_usize,
+            .undef_u1,
             .zero,
             .zero_usize,
+            .zero_u1,
             .zero_u8,
             .one,
             .one_usize,
+            .one_u1,
             .one_u8,
             .four_u8,
             .negative_one,
@@ -2172,7 +2181,7 @@ pub const Pool = struct {
             .bool_false,
             .empty_tuple,
             .none,
-            => unreachable,
+            => unreachable, // values, not types
 
             _ => |ip_index| switch (ip.indexToKey(ip_index)) {
                 .int_type => |int_info| return pool.fromIntInfo(allocator, int_info, mod, kind),
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index d2a72502ed..268a57417b 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -36,6 +36,10 @@ const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
 
 const Error = error{ OutOfMemory, CodegenFail };
 
+pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features {
+    return null;
+}
+
 fn subArchName(features: std.Target.Cpu.Feature.Set, arch: anytype, mappings: anytype) ?[]const u8 {
     inline for (mappings) |mapping| {
         if (arch.featureSetHas(features, mapping[0])) return mapping[1];
@@ -3081,10 +3085,11 @@ pub const Object = struct {
             .undefined_type,
             .enum_literal_type,
             => unreachable,
+            .ptr_usize_type,
+            .ptr_const_comptime_int_type,
             .manyptr_u8_type,
             .manyptr_const_u8_type,
             .manyptr_const_u8_sentinel_0_type,
-            .single_const_pointer_to_comptime_int_type,
             => .ptr,
             .slice_const_u8_type,
             .slice_const_u8_sentinel_0_type,
@@ -3098,11 +3103,16 @@ pub const Object = struct {
             => unreachable,
             // values, not types
             .undef,
+            .undef_bool,
+            .undef_usize,
+            .undef_u1,
             .zero,
             .zero_usize,
+            .zero_u1,
             .zero_u8,
             .one,
             .one_usize,
+            .one_u1,
             .one_u8,
             .four_u8,
             .negative_one,
@@ -4959,7 +4969,8 @@ pub const FuncGen = struct {
                 .error_name     => try self.airErrorName(inst),
                 .splat          => try self.airSplat(inst),
                 .select         => try self.airSelect(inst),
-                .shuffle        => try self.airShuffle(inst),
+                .shuffle_one    => try self.airShuffleOne(inst),
+                .shuffle_two    => try self.airShuffleTwo(inst),
                 .aggregate_init => try self.airAggregateInit(inst),
                 .union_init     => try self.airUnionInit(inst),
                 .prefetch       => try self.airPrefetch(inst),
@@ -8917,6 +8928,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(extra.rhs);
 
         const lhs_ty = self.typeOf(extra.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const dest_ty = self.typeOfIndex(inst);
@@ -8986,6 +8999,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(bin_op.rhs);
 
         const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@@ -8997,14 +9012,17 @@ pub const FuncGen = struct {
 
     fn airShl(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const o = self.ng.object;
+        const zcu = o.pt.zcu;
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
 
-        const lhs_type = self.typeOf(bin_op.lhs);
+        const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
 
-        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_type), "");
+        const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
         return self.wip.bin(.shl, lhs, casted_rhs, "");
     }
 
@@ -9023,6 +9041,8 @@ pub const FuncGen = struct {
         const llvm_lhs_scalar_ty = llvm_lhs_ty.scalarType(&o.builder);
 
         const rhs_ty = self.typeOf(bin_op.rhs);
+        if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const rhs_info = rhs_ty.intInfo(zcu);
         assert(rhs_info.signedness == .unsigned);
         const llvm_rhs_ty = try o.lowerType(rhs_ty);
@@ -9095,6 +9115,8 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(bin_op.rhs);
 
         const lhs_ty = self.typeOf(bin_op.lhs);
+        if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu))
+            return self.ng.todo("implement vector shifts with scalar rhs", .{});
         const lhs_scalar_ty = lhs_ty.scalarType(zcu);
 
         const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), "");
@@ -9167,11 +9189,7 @@ pub const FuncGen = struct {
             const is_vector = operand_ty.zigTypeTag(zcu) == .vector;
             assert(is_vector == (dest_ty.zigTypeTag(zcu) == .vector));
 
-            const min_panic_id: Zcu.SimplePanicId, const max_panic_id: Zcu.SimplePanicId = id: {
-                if (dest_is_enum) break :id .{ .invalid_enum_value, .invalid_enum_value };
-                if (dest_info.signedness == .unsigned) break :id .{ .negative_to_unsigned, .cast_truncated_data };
-                break :id .{ .cast_truncated_data, .cast_truncated_data };
-            };
+            const panic_id: Zcu.SimplePanicId = if (dest_is_enum) .invalid_enum_value else .integer_out_of_bounds;
 
             if (have_min_check) {
                 const min_const_scalar = try minIntConst(&o.builder, dest_scalar, operand_scalar_llvm_ty, zcu);
@@ -9185,7 +9203,7 @@ pub const FuncGen = struct {
                 const ok_block = try fg.wip.block(1, "IntMinOk");
                 _ = try fg.wip.brCond(ok, ok_block, fail_block, .none);
                 fg.wip.cursor = .{ .block = fail_block };
-                try fg.buildSimplePanic(min_panic_id);
+                try fg.buildSimplePanic(panic_id);
                 fg.wip.cursor = .{ .block = ok_block };
             }
 
@@ -9201,7 +9219,7 @@ pub const FuncGen = struct {
                 const ok_block = try fg.wip.block(1, "IntMaxOk");
                 _ = try fg.wip.brCond(ok, ok_block, fail_block, .none);
                 fg.wip.cursor = .{ .block = fail_block };
-                try fg.buildSimplePanic(max_panic_id);
+                try fg.buildSimplePanic(panic_id);
                 fg.wip.cursor = .{ .block = ok_block };
             }
         }
@@ -9249,8 +9267,6 @@ pub const FuncGen = struct {
         const operand_ty = self.typeOf(ty_op.operand);
         const dest_ty = self.typeOfIndex(inst);
         const target = zcu.getTarget();
-        const dest_bits = dest_ty.floatBits(target);
-        const src_bits = operand_ty.floatBits(target);
 
         if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
             return self.wip.cast(.fptrunc, operand, try o.lowerType(dest_ty), "");
@@ -9258,6 +9274,8 @@ pub const FuncGen = struct {
             const operand_llvm_ty = try o.lowerType(operand_ty);
             const dest_llvm_ty = try o.lowerType(dest_ty);
 
+            const dest_bits = dest_ty.floatBits(target);
+            const src_bits = operand_ty.floatBits(target);
             const fn_name = try o.builder.strtabStringFmt("__trunc{s}f{s}f2", .{
                 compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
             });
@@ -9342,11 +9360,12 @@ pub const FuncGen = struct {
             return self.wip.conv(.unsigned, operand, llvm_dest_ty, "");
         }
 
-        if (operand_ty.zigTypeTag(zcu) == .int and inst_ty.isPtrAtRuntime(zcu)) {
+        const operand_scalar_ty = operand_ty.scalarType(zcu);
+        const inst_scalar_ty = inst_ty.scalarType(zcu);
+        if (operand_scalar_ty.zigTypeTag(zcu) == .int and inst_scalar_ty.isPtrAtRuntime(zcu)) {
             return self.wip.cast(.inttoptr, operand, llvm_dest_ty, "");
         }
-
-        if (operand_ty.isPtrAtRuntime(zcu) and inst_ty.zigTypeTag(zcu) == .int) {
+        if (operand_scalar_ty.isPtrAtRuntime(zcu) and inst_scalar_ty.zigTypeTag(zcu) == .int) {
             return self.wip.cast(.ptrtoint, operand, llvm_dest_ty, "");
         }
 
@@ -9644,7 +9663,7 @@ pub const FuncGen = struct {
         const zcu = o.pt.zcu;
         const ip = &zcu.intern_pool;
         for (body_tail[1..]) |body_inst| {
-            switch (fg.liveness.categorizeOperand(fg.air, body_inst, body_tail[0], ip)) {
+            switch (fg.liveness.categorizeOperand(fg.air, zcu, body_inst, body_tail[0], ip)) {
                 .none => continue,
                 .write, .noret, .complex => return false,
                 .tomb => return true,
@@ -10399,42 +10418,192 @@ pub const FuncGen = struct {
         return self.wip.select(.normal, pred, a, b, "");
     }
 
-    fn airShuffle(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
-        const o = self.ng.object;
+    fn airShuffleOne(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+        const o = fg.ng.object;
         const pt = o.pt;
         const zcu = pt.zcu;
-        const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
-        const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
-        const a = try self.resolveInst(extra.a);
-        const b = try self.resolveInst(extra.b);
-        const mask = Value.fromInterned(extra.mask);
-        const mask_len = extra.mask_len;
-        const a_len = self.typeOf(extra.a).vectorLen(zcu);
-
-        // LLVM uses integers larger than the length of the first array to
-        // index into the second array. This was deemed unnecessarily fragile
-        // when changing code, so Zig uses negative numbers to index the
-        // second vector. These start at -1 and go down, and are easiest to use
-        // with the ~ operator. Here we convert between the two formats.
-        const values = try self.gpa.alloc(Builder.Constant, mask_len);
-        defer self.gpa.free(values);
-
-        for (values, 0..) |*val, i| {
-            const elem = try mask.elemValue(pt, i);
-            if (elem.isUndef(zcu)) {
-                val.* = try o.builder.undefConst(.i32);
-            } else {
-                const int = elem.toSignedInt(zcu);
-                const unsigned: u32 = @intCast(if (int >= 0) int else ~int + a_len);
-                val.* = try o.builder.intConst(.i32, unsigned);
+        const gpa = zcu.gpa;
+
+        const unwrapped = fg.air.unwrapShuffleOne(zcu, inst);
+
+        const operand = try fg.resolveInst(unwrapped.operand);
+        const mask = unwrapped.mask;
+        const operand_ty = fg.typeOf(unwrapped.operand);
+        const llvm_operand_ty = try o.lowerType(operand_ty);
+        const llvm_result_ty = try o.lowerType(unwrapped.result_ty);
+        const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
+        const llvm_poison_elem = try o.builder.poisonConst(llvm_elem_ty);
+        const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
+        const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
+
+        // LLVM requires that the two input vectors have the same length, so lowering isn't trivial.
+        // And, in the words of jacobly0: "llvm sucks at shuffles so we do have to hold its hand at
+        // least a bit". So, there are two cases here.
+        //
+        // If the operand length equals the mask length, we do just the one `shufflevector`, where
+        // the second operand is a constant vector with comptime-known elements at the right indices
+        // and poison values elsewhere (in the indices which won't be selected).
+        //
+        // Otherwise, we lower to *two* `shufflevector` instructions. The first shuffles the runtime
+        // operand with an all-poison vector to extract and correctly position all of the runtime
+        // elements. We also make a constant vector with all of the comptime elements correctly
+        // positioned. Then, our second instruction selects elements from those "runtime-or-poison"
+        // and "comptime-or-poison" vectors to compute the result.
+
+        // This buffer is used primarily for the mask constants.
+        const llvm_elem_buf = try gpa.alloc(Builder.Constant, mask.len);
+        defer gpa.free(llvm_elem_buf);
+
+        // ...but first, we'll collect all of the comptime-known values.
+        var any_defined_comptime_value = false;
+        for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
+            llvm_elem.* = switch (mask_elem.unwrap()) {
+                .elem => llvm_poison_elem,
+                .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) elem: {
+                    any_defined_comptime_value = true;
+                    break :elem try o.lowerValue(val);
+                } else llvm_poison_elem,
+            };
+        }
+        // This vector is like the result, but runtime elements are replaced with poison.
+        const comptime_and_poison: Builder.Value = if (any_defined_comptime_value) vec: {
+            break :vec try o.builder.vectorValue(llvm_result_ty, llvm_elem_buf);
+        } else try o.builder.poisonValue(llvm_result_ty);
+
+        if (operand_ty.vectorLen(zcu) == mask.len) {
+            // input length equals mask/output length, so we lower to one instruction
+            for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
+                llvm_elem.* = switch (mask_elem.unwrap()) {
+                    .elem => |idx| try o.builder.intConst(.i32, idx),
+                    .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
+                        break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
+                    } else llvm_poison_mask_elem,
+                };
             }
+            return fg.wip.shuffleVector(
+                operand,
+                comptime_and_poison,
+                try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+                "",
+            );
+        }
+
+        for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
+            llvm_elem.* = switch (mask_elem.unwrap()) {
+                .elem => |idx| try o.builder.intConst(.i32, idx),
+                .value => llvm_poison_mask_elem,
+            };
         }
+        // This vector is like our result, but all comptime-known elements are poison.
+        const runtime_and_poison = try fg.wip.shuffleVector(
+            operand,
+            try o.builder.poisonValue(llvm_operand_ty),
+            try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+            "",
+        );
 
-        const llvm_mask_value = try o.builder.vectorValue(
-            try o.builder.vectorType(.normal, mask_len, .i32),
-            values,
+        if (!any_defined_comptime_value) {
+            // `comptime_and_poison` is just poison; a second shuffle would be a nop.
+            return runtime_and_poison;
+        }
+
+        // In this second shuffle, the inputs, the mask, and the output all have the same length.
+        for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
+            llvm_elem.* = switch (mask_elem.unwrap()) {
+                .elem => try o.builder.intConst(.i32, elem_idx),
+                .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
+                    break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
+                } else llvm_poison_mask_elem,
+            };
+        }
+        // Merge the runtime and comptime elements with the mask we just built.
+        return fg.wip.shuffleVector(
+            runtime_and_poison,
+            comptime_and_poison,
+            try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
+            "",
+        );
+    }
+
+    fn airShuffleTwo(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
+        const o = fg.ng.object;
+        const pt = o.pt;
+        const zcu = pt.zcu;
+        const gpa = zcu.gpa;
+
+        const unwrapped = fg.air.unwrapShuffleTwo(zcu, inst);
+
+        const mask = unwrapped.mask;
+        const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
+        const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
+        const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
+
+        // This is kind of simpler than in `airShuffleOne`. We extend the shorter vector to the
+        // length of the longer one with an initial `shufflevector` if necessary, and then do the
+        // actual computation with a second `shufflevector`.
+
+        const operand_a_len = fg.typeOf(unwrapped.operand_a).vectorLen(zcu);
+        const operand_b_len = fg.typeOf(unwrapped.operand_b).vectorLen(zcu);
+        const operand_len: u32 = @max(operand_a_len, operand_b_len);
+
+        // If we need to extend an operand, this is the type that mask will have.
+        const llvm_operand_mask_ty = try o.builder.vectorType(.normal, operand_len, .i32);
+
+        const llvm_elem_buf = try gpa.alloc(Builder.Constant, @max(mask.len, operand_len));
+        defer gpa.free(llvm_elem_buf);
+
+        const operand_a: Builder.Value = extend: {
+            const raw = try fg.resolveInst(unwrapped.operand_a);
+            if (operand_a_len == operand_len) break :extend raw;
+            // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
+            const mask_elems = llvm_elem_buf[0..operand_len];
+            for (mask_elems[0..operand_a_len], 0..) |*llvm_elem, elem_idx| {
+                llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
+            }
+            @memset(mask_elems[operand_a_len..], llvm_poison_mask_elem);
+            const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_a_len, llvm_elem_ty);
+            break :extend try fg.wip.shuffleVector(
+                raw,
+                try o.builder.poisonValue(llvm_this_operand_ty),
+                try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
+                "",
+            );
+        };
+        const operand_b: Builder.Value = extend: {
+            const raw = try fg.resolveInst(unwrapped.operand_b);
+            if (operand_b_len == operand_len) break :extend raw;
+            // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
+            const mask_elems = llvm_elem_buf[0..operand_len];
+            for (mask_elems[0..operand_b_len], 0..) |*llvm_elem, elem_idx| {
+                llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
+            }
+            @memset(mask_elems[operand_b_len..], llvm_poison_mask_elem);
+            const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_b_len, llvm_elem_ty);
+            break :extend try fg.wip.shuffleVector(
+                raw,
+                try o.builder.poisonValue(llvm_this_operand_ty),
+                try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
+                "",
+            );
+        };
+
+        // `operand_a` and `operand_b` now have the same length (we've extended the shorter one with
+        // an initial shuffle if necessary). Now for the easy bit.
+
+        const mask_elems = llvm_elem_buf[0..mask.len];
+        for (mask, mask_elems) |mask_elem, *llvm_mask_elem| {
+            llvm_mask_elem.* = switch (mask_elem.unwrap()) {
+                .a_elem => |idx| try o.builder.intConst(.i32, idx),
+                .b_elem => |idx| try o.builder.intConst(.i32, operand_len + idx),
+                .undef => llvm_poison_mask_elem,
+            };
+        }
+        return fg.wip.shuffleVector(
+            operand_a,
+            operand_b,
+            try o.builder.vectorValue(llvm_mask_ty, mask_elems),
+            "",
         );
-        return self.wip.shuffleVector(a, b, llvm_mask_value, "");
     }
 
     /// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result.
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 5041634a75..f83c6979ff 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -28,6 +28,15 @@ const SpvAssembler = @import("spirv/Assembler.zig");
 
 const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef);
 
+pub fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initMany(&.{
+        .expand_intcast_safe,
+        .expand_add_safe,
+        .expand_sub_safe,
+        .expand_mul_safe,
+    });
+}
+
 pub const zig_call_abi_ver = 3;
 pub const big_int_bits = 32;
 
@@ -3243,7 +3252,8 @@ const NavGen = struct {
 
             .splat => try self.airSplat(inst),
             .reduce, .reduce_optimized => try self.airReduce(inst),
-            .shuffle                   => try self.airShuffle(inst),
+            .shuffle_one               => try self.airShuffleOne(inst),
+            .shuffle_two               => try self.airShuffleTwo(inst),
 
             .ptr_add => try self.airPtrAdd(inst),
             .ptr_sub => try self.airPtrSub(inst),
@@ -3380,6 +3390,10 @@ const NavGen = struct {
         const zcu = self.pt.zcu;
         const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
 
+        if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
         const base = try self.temporary(bin_op.lhs);
         const shift = try self.temporary(bin_op.rhs);
 
@@ -3866,6 +3880,10 @@ const NavGen = struct {
         const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
         const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
 
+        if (self.typeOf(extra.lhs).isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu)) {
+            return self.fail("vector shift with scalar rhs", .{});
+        }
+
         const base = try self.temporary(extra.lhs);
         const shift = try self.temporary(extra.rhs);
 
@@ -4030,40 +4048,57 @@ const NavGen = struct {
         return result_id;
     }
 
-    fn airShuffle(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
-        const pt = self.pt;
+    fn airShuffleOne(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
+        const pt = ng.pt;
         const zcu = pt.zcu;
-        const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
-        const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
-        const a = try self.resolve(extra.a);
-        const b = try self.resolve(extra.b);
-        const mask = Value.fromInterned(extra.mask);
+        const gpa = zcu.gpa;
 
-        // Note: number of components in the result, a, and b may differ.
-        const result_ty = self.typeOfIndex(inst);
-        const scalar_ty = result_ty.scalarType(zcu);
-        const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
+        const unwrapped = ng.air.unwrapShuffleOne(zcu, inst);
+        const mask = unwrapped.mask;
+        const result_ty = unwrapped.result_ty;
+        const elem_ty = result_ty.childType(zcu);
+        const operand = try ng.resolve(unwrapped.operand);
 
-        const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu));
-        defer self.gpa.free(constituents);
+        const constituents = try gpa.alloc(IdRef, mask.len);
+        defer gpa.free(constituents);
 
-        for (constituents, 0..) |*id, i| {
-            const elem = try mask.elemValue(pt, i);
-            if (elem.isUndef(zcu)) {
-                id.* = try self.spv.constUndef(scalar_ty_id);
-                continue;
-            }
+        for (constituents, mask) |*id, mask_elem| {
+            id.* = switch (mask_elem.unwrap()) {
+                .elem => |idx| try ng.extractVectorComponent(elem_ty, operand, idx),
+                .value => |val| try ng.constant(elem_ty, .fromInterned(val), .direct),
+            };
+        }
 
-            const index = elem.toSignedInt(zcu);
-            if (index >= 0) {
-                id.* = try self.extractVectorComponent(scalar_ty, a, @intCast(index));
-            } else {
-                id.* = try self.extractVectorComponent(scalar_ty, b, @intCast(~index));
-            }
+        const result_ty_id = try ng.resolveType(result_ty, .direct);
+        return try ng.constructComposite(result_ty_id, constituents);
+    }
+
+    fn airShuffleTwo(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
+        const pt = ng.pt;
+        const zcu = pt.zcu;
+        const gpa = zcu.gpa;
+
+        const unwrapped = ng.air.unwrapShuffleTwo(zcu, inst);
+        const mask = unwrapped.mask;
+        const result_ty = unwrapped.result_ty;
+        const elem_ty = result_ty.childType(zcu);
+        const elem_ty_id = try ng.resolveType(elem_ty, .direct);
+        const operand_a = try ng.resolve(unwrapped.operand_a);
+        const operand_b = try ng.resolve(unwrapped.operand_b);
+
+        const constituents = try gpa.alloc(IdRef, mask.len);
+        defer gpa.free(constituents);
+
+        for (constituents, mask) |*id, mask_elem| {
+            id.* = switch (mask_elem.unwrap()) {
+                .a_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_a, idx),
+                .b_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_b, idx),
+                .undef => try ng.spv.constUndef(elem_ty_id),
+            };
         }
 
-        const result_ty_id = try self.resolveType(result_ty, .direct);
-        return try self.constructComposite(result_ty_id, constituents);
+        const result_ty_id = try ng.resolveType(result_ty, .direct);
+        return try ng.constructComposite(result_ty_id, constituents);
     }
 
     fn indicesToIds(self: *NavGen, indices: []const u32) ![]IdRef {
author	Jacob Young <15544577+jacobly0@users.noreply.github.com>	2025-06-01 22:02:34 -0400
committer	GitHub <noreply@github.com>	2025-06-01 22:02:34 -0400
commit	8dbd29cc4588cf118532a816d74b78f62999b636 (patch)
tree	0fc19e694d1ad7366dd2b7153dd0d4647d3a9159 /src/codegen
parent	0386730777da858908aaba4ef96fb5bd48faafc9 (diff)
parent	6a63c8653ae9121f1cbcee49d32ec4f8deaf0b65 (diff)
download	zig-8dbd29cc4588cf118532a816d74b78f62999b636.tar.gz zig-8dbd29cc4588cf118532a816d74b78f62999b636.zip