diff options
| author | Jacob Young <15544577+jacobly0@users.noreply.github.com> | 2025-06-01 22:02:34 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-01 22:02:34 -0400 |
| commit | 8dbd29cc4588cf118532a816d74b78f62999b636 (patch) | |
| tree | 0fc19e694d1ad7366dd2b7153dd0d4647d3a9159 /src/codegen | |
| parent | 0386730777da858908aaba4ef96fb5bd48faafc9 (diff) | |
| parent | 6a63c8653ae9121f1cbcee49d32ec4f8deaf0b65 (diff) | |
| download | zig-8dbd29cc4588cf118532a816d74b78f62999b636.tar.gz zig-8dbd29cc4588cf118532a816d74b78f62999b636.zip | |
Merge pull request #24011 from jacobly0/legalize-unary
Legalize: implement scalarization and safety check expansion
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/c.zig | 106 | ||||
| -rw-r--r-- | src/codegen/c/Type.zig | 21 | ||||
| -rw-r--r-- | src/codegen/llvm.zig | 263 | ||||
| -rw-r--r-- | src/codegen/spirv.zig | 91 |
4 files changed, 374 insertions, 107 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 8539efdbfe..c68abc06ce 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -4,6 +4,7 @@ const assert = std.debug.assert; const mem = std.mem; const log = std.log.scoped(.c); +const dev = @import("../dev.zig"); const link = @import("../link.zig"); const Zcu = @import("../Zcu.zig"); const Module = @import("../Package/Module.zig"); @@ -20,6 +21,15 @@ const Alignment = InternPool.Alignment; const BigIntLimb = std.math.big.Limb; const BigInt = std.math.big.int; +pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features { + return if (dev.env.supports(.legalize)) comptime &.initMany(&.{ + .expand_intcast_safe, + .expand_add_safe, + .expand_sub_safe, + .expand_mul_safe, + }) else null; // we don't currently ask zig1 to use safe optimization modes +} + pub const CType = @import("c/Type.zig"); pub const CValue = union(enum) { @@ -206,7 +216,6 @@ const reserved_idents = std.StaticStringMap(void).initComptime(.{ .{ "atomic_ushort", {} }, .{ "atomic_wchar_t", {} }, .{ "auto", {} }, - .{ "bool", {} }, .{ "break", {} }, .{ "case", {} }, .{ "char", {} }, @@ -266,6 +275,11 @@ const reserved_idents = std.StaticStringMap(void).initComptime(.{ .{ "va_end", {} }, .{ "va_copy", {} }, + // stdbool.h + .{ "bool", {} }, + .{ "false", {} }, + .{ "true", {} }, + // stddef.h .{ "offsetof", {} }, @@ -1591,7 +1605,7 @@ pub const DeclGen = struct { try writer.writeAll("(("); try dg.renderCType(writer, ctype); return writer.print("){x})", .{ - try dg.fmtIntLiteral(try pt.undefValue(.usize), .Other), + try dg.fmtIntLiteral(.undef_usize, .Other), }); }, .slice => { @@ -1605,7 +1619,7 @@ pub const DeclGen = struct { const ptr_ty = ty.slicePtrFieldType(zcu); try dg.renderType(writer, ptr_ty); return writer.print("){x}, {0x}}}", .{ - try dg.fmtIntLiteral(try dg.pt.undefValue(.usize), .Other), + try dg.fmtIntLiteral(.undef_usize, .Other), }); }, }, @@ -3360,7 +3374,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .error_name => try airErrorName(f, inst), .splat => try airSplat(f, inst), .select => try airSelect(f, inst), - .shuffle => try airShuffle(f, inst), + .shuffle_one => try airShuffleOne(f, inst), + .shuffle_two => try airShuffleTwo(f, inst), .reduce => try airReduce(f, inst), .aggregate_init => try airAggregateInit(f, inst), .union_init => try airUnionInit(f, inst), @@ -4179,7 +4194,7 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info: try v.elem(f, w); try w.writeAll(", "); try f.writeCValue(w, rhs, .FunctionArgument); - try v.elem(f, w); + if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, w); try f.object.dg.renderBuiltinInfo(w, scalar_ty, info); try w.writeAll(");\n"); try v.end(f, inst, w); @@ -6376,7 +6391,7 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue { if (operand_child_ctype.info(ctype_pool) == .array) { try writer.writeByte('&'); try f.writeCValueDeref(writer, operand); - try writer.print("[{}]", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))}); + try writer.print("[{}]", .{try f.fmtIntLiteral(.zero_usize)}); } else try f.writeCValue(writer, operand, .Other); } try a.end(f, writer); @@ -6536,7 +6551,7 @@ fn airBinBuiltinCall( try v.elem(f, writer); try writer.writeAll(", "); try f.writeCValue(writer, rhs, .FunctionArgument); - try v.elem(f, writer); + if (f.typeOf(bin_op.rhs).isVector(zcu)) try v.elem(f, writer); try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info); try writer.writeAll(");\n"); try v.end(f, inst, writer); @@ -6907,7 +6922,7 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { try writer.writeAll("for ("); try f.writeCValue(writer, index, .Other); try writer.writeAll(" = "); - try f.object.dg.renderValue(writer, try pt.intValue(.usize, 0), .Other); + try f.object.dg.renderValue(writer, .zero_usize, .Other); try writer.writeAll("; "); try f.writeCValue(writer, index, .Other); try writer.writeAll(" != "); @@ -7149,34 +7164,73 @@ fn airSelect(f: *Function, inst: Air.Inst.Index) !CValue { return local; } -fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue { +fn airShuffleOne(f: *Function, inst: Air.Inst.Index) !CValue { const pt = f.object.dg.pt; const zcu = pt.zcu; - const ty_pl = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = f.air.extraData(Air.Shuffle, ty_pl.payload).data; - const mask = Value.fromInterned(extra.mask); - const lhs = try f.resolveInst(extra.a); - const rhs = try f.resolveInst(extra.b); - - const inst_ty = f.typeOfIndex(inst); + const unwrapped = f.air.unwrapShuffleOne(zcu, inst); + const mask = unwrapped.mask; + const operand = try f.resolveInst(unwrapped.operand); + const inst_ty = unwrapped.result_ty; const writer = f.object.writer(); const local = try f.allocLocal(inst, inst_ty); - try reap(f, inst, &.{ extra.a, extra.b }); // local cannot alias operands - for (0..extra.mask_len) |index| { + try reap(f, inst, &.{unwrapped.operand}); // local cannot alias operand + for (mask, 0..) |mask_elem, out_idx| { try f.writeCValue(writer, local, .Other); try writer.writeByte('['); - try f.object.dg.renderValue(writer, try pt.intValue(.usize, index), .Other); + try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other); try writer.writeAll("] = "); + switch (mask_elem.unwrap()) { + .elem => |src_idx| { + try f.writeCValue(writer, operand, .Other); + try writer.writeByte('['); + try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other); + try writer.writeByte(']'); + }, + .value => |val| try f.object.dg.renderValue(writer, .fromInterned(val), .Other), + } + try writer.writeAll(";\n"); + } - const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu); - const src_val = try pt.intValue(.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63))); + return local; +} + +fn airShuffleTwo(f: *Function, inst: Air.Inst.Index) !CValue { + const pt = f.object.dg.pt; + const zcu = pt.zcu; + + const unwrapped = f.air.unwrapShuffleTwo(zcu, inst); + const mask = unwrapped.mask; + const operand_a = try f.resolveInst(unwrapped.operand_a); + const operand_b = try f.resolveInst(unwrapped.operand_b); + const inst_ty = unwrapped.result_ty; + const elem_ty = inst_ty.childType(zcu); - try f.writeCValue(writer, if (mask_elem >= 0) lhs else rhs, .Other); + const writer = f.object.writer(); + const local = try f.allocLocal(inst, inst_ty); + try reap(f, inst, &.{ unwrapped.operand_a, unwrapped.operand_b }); // local cannot alias operands + for (mask, 0..) |mask_elem, out_idx| { + try f.writeCValue(writer, local, .Other); try writer.writeByte('['); - try f.object.dg.renderValue(writer, src_val, .Other); - try writer.writeAll("];\n"); + try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other); + try writer.writeAll("] = "); + switch (mask_elem.unwrap()) { + .a_elem => |src_idx| { + try f.writeCValue(writer, operand_a, .Other); + try writer.writeByte('['); + try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other); + try writer.writeByte(']'); + }, + .b_elem => |src_idx| { + try f.writeCValue(writer, operand_b, .Other); + try writer.writeByte('['); + try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other); + try writer.writeByte(']'); + }, + .undef => try f.object.dg.renderUndefValue(writer, elem_ty, .Other), + } + try writer.writeAll(";\n"); } return local; @@ -8311,11 +8365,11 @@ const Vectorize = struct { try writer.writeAll("for ("); try f.writeCValue(writer, local, .Other); - try writer.print(" = {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, 0))}); + try writer.print(" = {d}; ", .{try f.fmtIntLiteral(.zero_usize)}); try f.writeCValue(writer, local, .Other); try writer.print(" < {d}; ", .{try f.fmtIntLiteral(try pt.intValue(.usize, ty.vectorLen(zcu)))}); try f.writeCValue(writer, local, .Other); - try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(try pt.intValue(.usize, 1))}); + try writer.print(" += {d}) {{\n", .{try f.fmtIntLiteral(.one_usize)}); f.object.indent_writer.pushIndent(); break :index .{ .index = local }; diff --git a/src/codegen/c/Type.zig b/src/codegen/c/Type.zig index 7d3a485e2a..e5901ec626 100644 --- a/src/codegen/c/Type.zig +++ b/src/codegen/c/Type.zig @@ -1408,6 +1408,15 @@ pub const Pool = struct { .bits = pt.zcu.errorSetBits(), }, mod, kind), + .ptr_usize_type, + => return pool.getPointer(allocator, .{ + .elem_ctype = .usize, + }), + .ptr_const_comptime_int_type, + => return pool.getPointer(allocator, .{ + .elem_ctype = .void, + .@"const" = true, + }), .manyptr_u8_type, => return pool.getPointer(allocator, .{ .elem_ctype = .u8, @@ -1418,11 +1427,6 @@ pub const Pool = struct { .elem_ctype = .u8, .@"const" = true, }), - .single_const_pointer_to_comptime_int_type, - => return pool.getPointer(allocator, .{ - .elem_ctype = .void, - .@"const" = true, - }), .slice_const_u8_type, .slice_const_u8_sentinel_0_type, => { @@ -2157,11 +2161,16 @@ pub const Pool = struct { }, .undef, + .undef_bool, + .undef_usize, + .undef_u1, .zero, .zero_usize, + .zero_u1, .zero_u8, .one, .one_usize, + .one_u1, .one_u8, .four_u8, .negative_one, @@ -2172,7 +2181,7 @@ pub const Pool = struct { .bool_false, .empty_tuple, .none, - => unreachable, + => unreachable, // values, not types _ => |ip_index| switch (ip.indexToKey(ip_index)) { .int_type => |int_info| return pool.fromIntInfo(allocator, int_info, mod, kind), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index d2a72502ed..268a57417b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -36,6 +36,10 @@ const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev; const Error = error{ OutOfMemory, CodegenFail }; +pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features { + return null; +} + fn subArchName(features: std.Target.Cpu.Feature.Set, arch: anytype, mappings: anytype) ?[]const u8 { inline for (mappings) |mapping| { if (arch.featureSetHas(features, mapping[0])) return mapping[1]; @@ -3081,10 +3085,11 @@ pub const Object = struct { .undefined_type, .enum_literal_type, => unreachable, + .ptr_usize_type, + .ptr_const_comptime_int_type, .manyptr_u8_type, .manyptr_const_u8_type, .manyptr_const_u8_sentinel_0_type, - .single_const_pointer_to_comptime_int_type, => .ptr, .slice_const_u8_type, .slice_const_u8_sentinel_0_type, @@ -3098,11 +3103,16 @@ pub const Object = struct { => unreachable, // values, not types .undef, + .undef_bool, + .undef_usize, + .undef_u1, .zero, .zero_usize, + .zero_u1, .zero_u8, .one, .one_usize, + .one_u1, .one_u8, .four_u8, .negative_one, @@ -4959,7 +4969,8 @@ pub const FuncGen = struct { .error_name => try self.airErrorName(inst), .splat => try self.airSplat(inst), .select => try self.airSelect(inst), - .shuffle => try self.airShuffle(inst), + .shuffle_one => try self.airShuffleOne(inst), + .shuffle_two => try self.airShuffleTwo(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), @@ -8917,6 +8928,8 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(extra.rhs); const lhs_ty = self.typeOf(extra.lhs); + if (lhs_ty.isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu)) + return self.ng.todo("implement vector shifts with scalar rhs", .{}); const lhs_scalar_ty = lhs_ty.scalarType(zcu); const dest_ty = self.typeOfIndex(inst); @@ -8986,6 +8999,8 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const lhs_ty = self.typeOf(bin_op.lhs); + if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) + return self.ng.todo("implement vector shifts with scalar rhs", .{}); const lhs_scalar_ty = lhs_ty.scalarType(zcu); const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), ""); @@ -8997,14 +9012,17 @@ pub const FuncGen = struct { fn airShl(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = self.ng.object; + const zcu = o.pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_type = self.typeOf(bin_op.lhs); + const lhs_ty = self.typeOf(bin_op.lhs); + if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) + return self.ng.todo("implement vector shifts with scalar rhs", .{}); - const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_type), ""); + const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), ""); return self.wip.bin(.shl, lhs, casted_rhs, ""); } @@ -9023,6 +9041,8 @@ pub const FuncGen = struct { const llvm_lhs_scalar_ty = llvm_lhs_ty.scalarType(&o.builder); const rhs_ty = self.typeOf(bin_op.rhs); + if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) + return self.ng.todo("implement vector shifts with scalar rhs", .{}); const rhs_info = rhs_ty.intInfo(zcu); assert(rhs_info.signedness == .unsigned); const llvm_rhs_ty = try o.lowerType(rhs_ty); @@ -9095,6 +9115,8 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const lhs_ty = self.typeOf(bin_op.lhs); + if (lhs_ty.isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) + return self.ng.todo("implement vector shifts with scalar rhs", .{}); const lhs_scalar_ty = lhs_ty.scalarType(zcu); const casted_rhs = try self.wip.conv(.unsigned, rhs, try o.lowerType(lhs_ty), ""); @@ -9167,11 +9189,7 @@ pub const FuncGen = struct { const is_vector = operand_ty.zigTypeTag(zcu) == .vector; assert(is_vector == (dest_ty.zigTypeTag(zcu) == .vector)); - const min_panic_id: Zcu.SimplePanicId, const max_panic_id: Zcu.SimplePanicId = id: { - if (dest_is_enum) break :id .{ .invalid_enum_value, .invalid_enum_value }; - if (dest_info.signedness == .unsigned) break :id .{ .negative_to_unsigned, .cast_truncated_data }; - break :id .{ .cast_truncated_data, .cast_truncated_data }; - }; + const panic_id: Zcu.SimplePanicId = if (dest_is_enum) .invalid_enum_value else .integer_out_of_bounds; if (have_min_check) { const min_const_scalar = try minIntConst(&o.builder, dest_scalar, operand_scalar_llvm_ty, zcu); @@ -9185,7 +9203,7 @@ pub const FuncGen = struct { const ok_block = try fg.wip.block(1, "IntMinOk"); _ = try fg.wip.brCond(ok, ok_block, fail_block, .none); fg.wip.cursor = .{ .block = fail_block }; - try fg.buildSimplePanic(min_panic_id); + try fg.buildSimplePanic(panic_id); fg.wip.cursor = .{ .block = ok_block }; } @@ -9201,7 +9219,7 @@ pub const FuncGen = struct { const ok_block = try fg.wip.block(1, "IntMaxOk"); _ = try fg.wip.brCond(ok, ok_block, fail_block, .none); fg.wip.cursor = .{ .block = fail_block }; - try fg.buildSimplePanic(max_panic_id); + try fg.buildSimplePanic(panic_id); fg.wip.cursor = .{ .block = ok_block }; } } @@ -9249,8 +9267,6 @@ pub const FuncGen = struct { const operand_ty = self.typeOf(ty_op.operand); const dest_ty = self.typeOfIndex(inst); const target = zcu.getTarget(); - const dest_bits = dest_ty.floatBits(target); - const src_bits = operand_ty.floatBits(target); if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) { return self.wip.cast(.fptrunc, operand, try o.lowerType(dest_ty), ""); @@ -9258,6 +9274,8 @@ pub const FuncGen = struct { const operand_llvm_ty = try o.lowerType(operand_ty); const dest_llvm_ty = try o.lowerType(dest_ty); + const dest_bits = dest_ty.floatBits(target); + const src_bits = operand_ty.floatBits(target); const fn_name = try o.builder.strtabStringFmt("__trunc{s}f{s}f2", .{ compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits), }); @@ -9342,11 +9360,12 @@ pub const FuncGen = struct { return self.wip.conv(.unsigned, operand, llvm_dest_ty, ""); } - if (operand_ty.zigTypeTag(zcu) == .int and inst_ty.isPtrAtRuntime(zcu)) { + const operand_scalar_ty = operand_ty.scalarType(zcu); + const inst_scalar_ty = inst_ty.scalarType(zcu); + if (operand_scalar_ty.zigTypeTag(zcu) == .int and inst_scalar_ty.isPtrAtRuntime(zcu)) { return self.wip.cast(.inttoptr, operand, llvm_dest_ty, ""); } - - if (operand_ty.isPtrAtRuntime(zcu) and inst_ty.zigTypeTag(zcu) == .int) { + if (operand_scalar_ty.isPtrAtRuntime(zcu) and inst_scalar_ty.zigTypeTag(zcu) == .int) { return self.wip.cast(.ptrtoint, operand, llvm_dest_ty, ""); } @@ -9644,7 +9663,7 @@ pub const FuncGen = struct { const zcu = o.pt.zcu; const ip = &zcu.intern_pool; for (body_tail[1..]) |body_inst| { - switch (fg.liveness.categorizeOperand(fg.air, body_inst, body_tail[0], ip)) { + switch (fg.liveness.categorizeOperand(fg.air, zcu, body_inst, body_tail[0], ip)) { .none => continue, .write, .noret, .complex => return false, .tomb => return true, @@ -10399,42 +10418,192 @@ pub const FuncGen = struct { return self.wip.select(.normal, pred, a, b, ""); } - fn airShuffle(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { - const o = self.ng.object; + fn airShuffleOne(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value { + const o = fg.ng.object; const pt = o.pt; const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; - const a = try self.resolveInst(extra.a); - const b = try self.resolveInst(extra.b); - const mask = Value.fromInterned(extra.mask); - const mask_len = extra.mask_len; - const a_len = self.typeOf(extra.a).vectorLen(zcu); - - // LLVM uses integers larger than the length of the first array to - // index into the second array. This was deemed unnecessarily fragile - // when changing code, so Zig uses negative numbers to index the - // second vector. These start at -1 and go down, and are easiest to use - // with the ~ operator. Here we convert between the two formats. - const values = try self.gpa.alloc(Builder.Constant, mask_len); - defer self.gpa.free(values); - - for (values, 0..) |*val, i| { - const elem = try mask.elemValue(pt, i); - if (elem.isUndef(zcu)) { - val.* = try o.builder.undefConst(.i32); - } else { - const int = elem.toSignedInt(zcu); - const unsigned: u32 = @intCast(if (int >= 0) int else ~int + a_len); - val.* = try o.builder.intConst(.i32, unsigned); + const gpa = zcu.gpa; + + const unwrapped = fg.air.unwrapShuffleOne(zcu, inst); + + const operand = try fg.resolveInst(unwrapped.operand); + const mask = unwrapped.mask; + const operand_ty = fg.typeOf(unwrapped.operand); + const llvm_operand_ty = try o.lowerType(operand_ty); + const llvm_result_ty = try o.lowerType(unwrapped.result_ty); + const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu)); + const llvm_poison_elem = try o.builder.poisonConst(llvm_elem_ty); + const llvm_poison_mask_elem = try o.builder.poisonConst(.i32); + const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32); + + // LLVM requires that the two input vectors have the same length, so lowering isn't trivial. + // And, in the words of jacobly0: "llvm sucks at shuffles so we do have to hold its hand at + // least a bit". So, there are two cases here. + // + // If the operand length equals the mask length, we do just the one `shufflevector`, where + // the second operand is a constant vector with comptime-known elements at the right indices + // and poison values elsewhere (in the indices which won't be selected). + // + // Otherwise, we lower to *two* `shufflevector` instructions. The first shuffles the runtime + // operand with an all-poison vector to extract and correctly position all of the runtime + // elements. We also make a constant vector with all of the comptime elements correctly + // positioned. Then, our second instruction selects elements from those "runtime-or-poison" + // and "comptime-or-poison" vectors to compute the result. + + // This buffer is used primarily for the mask constants. + const llvm_elem_buf = try gpa.alloc(Builder.Constant, mask.len); + defer gpa.free(llvm_elem_buf); + + // ...but first, we'll collect all of the comptime-known values. + var any_defined_comptime_value = false; + for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| { + llvm_elem.* = switch (mask_elem.unwrap()) { + .elem => llvm_poison_elem, + .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) elem: { + any_defined_comptime_value = true; + break :elem try o.lowerValue(val); + } else llvm_poison_elem, + }; + } + // This vector is like the result, but runtime elements are replaced with poison. + const comptime_and_poison: Builder.Value = if (any_defined_comptime_value) vec: { + break :vec try o.builder.vectorValue(llvm_result_ty, llvm_elem_buf); + } else try o.builder.poisonValue(llvm_result_ty); + + if (operand_ty.vectorLen(zcu) == mask.len) { + // input length equals mask/output length, so we lower to one instruction + for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| { + llvm_elem.* = switch (mask_elem.unwrap()) { + .elem => |idx| try o.builder.intConst(.i32, idx), + .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: { + break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx); + } else llvm_poison_mask_elem, + }; } + return fg.wip.shuffleVector( + operand, + comptime_and_poison, + try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf), + "", + ); + } + + for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| { + llvm_elem.* = switch (mask_elem.unwrap()) { + .elem => |idx| try o.builder.intConst(.i32, idx), + .value => llvm_poison_mask_elem, + }; } + // This vector is like our result, but all comptime-known elements are poison. + const runtime_and_poison = try fg.wip.shuffleVector( + operand, + try o.builder.poisonValue(llvm_operand_ty), + try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf), + "", + ); - const llvm_mask_value = try o.builder.vectorValue( - try o.builder.vectorType(.normal, mask_len, .i32), - values, + if (!any_defined_comptime_value) { + // `comptime_and_poison` is just poison; a second shuffle would be a nop. + return runtime_and_poison; + } + + // In this second shuffle, the inputs, the mask, and the output all have the same length. + for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| { + llvm_elem.* = switch (mask_elem.unwrap()) { + .elem => try o.builder.intConst(.i32, elem_idx), + .value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: { + break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx); + } else llvm_poison_mask_elem, + }; + } + // Merge the runtime and comptime elements with the mask we just built. + return fg.wip.shuffleVector( + runtime_and_poison, + comptime_and_poison, + try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf), + "", + ); + } + + fn airShuffleTwo(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value { + const o = fg.ng.object; + const pt = o.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const unwrapped = fg.air.unwrapShuffleTwo(zcu, inst); + + const mask = unwrapped.mask; + const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu)); + const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32); + const llvm_poison_mask_elem = try o.builder.poisonConst(.i32); + + // This is kind of simpler than in `airShuffleOne`. We extend the shorter vector to the + // length of the longer one with an initial `shufflevector` if necessary, and then do the + // actual computation with a second `shufflevector`. + + const operand_a_len = fg.typeOf(unwrapped.operand_a).vectorLen(zcu); + const operand_b_len = fg.typeOf(unwrapped.operand_b).vectorLen(zcu); + const operand_len: u32 = @max(operand_a_len, operand_b_len); + + // If we need to extend an operand, this is the type that mask will have. + const llvm_operand_mask_ty = try o.builder.vectorType(.normal, operand_len, .i32); + + const llvm_elem_buf = try gpa.alloc(Builder.Constant, @max(mask.len, operand_len)); + defer gpa.free(llvm_elem_buf); + + const operand_a: Builder.Value = extend: { + const raw = try fg.resolveInst(unwrapped.operand_a); + if (operand_a_len == operand_len) break :extend raw; + // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>` + const mask_elems = llvm_elem_buf[0..operand_len]; + for (mask_elems[0..operand_a_len], 0..) |*llvm_elem, elem_idx| { + llvm_elem.* = try o.builder.intConst(.i32, elem_idx); + } + @memset(mask_elems[operand_a_len..], llvm_poison_mask_elem); + const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_a_len, llvm_elem_ty); + break :extend try fg.wip.shuffleVector( + raw, + try o.builder.poisonValue(llvm_this_operand_ty), + try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems), + "", + ); + }; + const operand_b: Builder.Value = extend: { + const raw = try fg.resolveInst(unwrapped.operand_b); + if (operand_b_len == operand_len) break :extend raw; + // Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>` + const mask_elems = llvm_elem_buf[0..operand_len]; + for (mask_elems[0..operand_b_len], 0..) |*llvm_elem, elem_idx| { + llvm_elem.* = try o.builder.intConst(.i32, elem_idx); + } + @memset(mask_elems[operand_b_len..], llvm_poison_mask_elem); + const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_b_len, llvm_elem_ty); + break :extend try fg.wip.shuffleVector( + raw, + try o.builder.poisonValue(llvm_this_operand_ty), + try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems), + "", + ); + }; + + // `operand_a` and `operand_b` now have the same length (we've extended the shorter one with + // an initial shuffle if necessary). Now for the easy bit. + + const mask_elems = llvm_elem_buf[0..mask.len]; + for (mask, mask_elems) |mask_elem, *llvm_mask_elem| { + llvm_mask_elem.* = switch (mask_elem.unwrap()) { + .a_elem => |idx| try o.builder.intConst(.i32, idx), + .b_elem => |idx| try o.builder.intConst(.i32, operand_len + idx), + .undef => llvm_poison_mask_elem, + }; + } + return fg.wip.shuffleVector( + operand_a, + operand_b, + try o.builder.vectorValue(llvm_mask_ty, mask_elems), + "", ); - return self.wip.shuffleVector(a, b, llvm_mask_value, ""); } /// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result. diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig index 5041634a75..f83c6979ff 100644 --- a/src/codegen/spirv.zig +++ b/src/codegen/spirv.zig @@ -28,6 +28,15 @@ const SpvAssembler = @import("spirv/Assembler.zig"); const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef); +pub fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features { + return comptime &.initMany(&.{ + .expand_intcast_safe, + .expand_add_safe, + .expand_sub_safe, + .expand_mul_safe, + }); +} + pub const zig_call_abi_ver = 3; pub const big_int_bits = 32; @@ -3243,7 +3252,8 @@ const NavGen = struct { .splat => try self.airSplat(inst), .reduce, .reduce_optimized => try self.airReduce(inst), - .shuffle => try self.airShuffle(inst), + .shuffle_one => try self.airShuffleOne(inst), + .shuffle_two => try self.airShuffleTwo(inst), .ptr_add => try self.airPtrAdd(inst), .ptr_sub => try self.airPtrSub(inst), @@ -3380,6 +3390,10 @@ const NavGen = struct { const zcu = self.pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + if (self.typeOf(bin_op.lhs).isVector(zcu) and !self.typeOf(bin_op.rhs).isVector(zcu)) { + return self.fail("vector shift with scalar rhs", .{}); + } + const base = try self.temporary(bin_op.lhs); const shift = try self.temporary(bin_op.rhs); @@ -3866,6 +3880,10 @@ const NavGen = struct { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + if (self.typeOf(extra.lhs).isVector(zcu) and !self.typeOf(extra.rhs).isVector(zcu)) { + return self.fail("vector shift with scalar rhs", .{}); + } + const base = try self.temporary(extra.lhs); const shift = try self.temporary(extra.rhs); @@ -4030,40 +4048,57 @@ const NavGen = struct { return result_id; } - fn airShuffle(self: *NavGen, inst: Air.Inst.Index) !?IdRef { - const pt = self.pt; + fn airShuffleOne(ng: *NavGen, inst: Air.Inst.Index) !?IdRef { + const pt = ng.pt; const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; - const a = try self.resolve(extra.a); - const b = try self.resolve(extra.b); - const mask = Value.fromInterned(extra.mask); + const gpa = zcu.gpa; - // Note: number of components in the result, a, and b may differ. - const result_ty = self.typeOfIndex(inst); - const scalar_ty = result_ty.scalarType(zcu); - const scalar_ty_id = try self.resolveType(scalar_ty, .direct); + const unwrapped = ng.air.unwrapShuffleOne(zcu, inst); + const mask = unwrapped.mask; + const result_ty = unwrapped.result_ty; + const elem_ty = result_ty.childType(zcu); + const operand = try ng.resolve(unwrapped.operand); - const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu)); - defer self.gpa.free(constituents); + const constituents = try gpa.alloc(IdRef, mask.len); + defer gpa.free(constituents); - for (constituents, 0..) |*id, i| { - const elem = try mask.elemValue(pt, i); - if (elem.isUndef(zcu)) { - id.* = try self.spv.constUndef(scalar_ty_id); - continue; - } + for (constituents, mask) |*id, mask_elem| { + id.* = switch (mask_elem.unwrap()) { + .elem => |idx| try ng.extractVectorComponent(elem_ty, operand, idx), + .value => |val| try ng.constant(elem_ty, .fromInterned(val), .direct), + }; + } - const index = elem.toSignedInt(zcu); - if (index >= 0) { - id.* = try self.extractVectorComponent(scalar_ty, a, @intCast(index)); - } else { - id.* = try self.extractVectorComponent(scalar_ty, b, @intCast(~index)); - } + const result_ty_id = try ng.resolveType(result_ty, .direct); + return try ng.constructComposite(result_ty_id, constituents); + } + + fn airShuffleTwo(ng: *NavGen, inst: Air.Inst.Index) !?IdRef { + const pt = ng.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const unwrapped = ng.air.unwrapShuffleTwo(zcu, inst); + const mask = unwrapped.mask; + const result_ty = unwrapped.result_ty; + const elem_ty = result_ty.childType(zcu); + const elem_ty_id = try ng.resolveType(elem_ty, .direct); + const operand_a = try ng.resolve(unwrapped.operand_a); + const operand_b = try ng.resolve(unwrapped.operand_b); + + const constituents = try gpa.alloc(IdRef, mask.len); + defer gpa.free(constituents); + + for (constituents, mask) |*id, mask_elem| { + id.* = switch (mask_elem.unwrap()) { + .a_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_a, idx), + .b_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_b, idx), + .undef => try ng.spv.constUndef(elem_ty_id), + }; } - const result_ty_id = try self.resolveType(result_ty, .direct); - return try self.constructComposite(result_ty_id, constituents); + const result_ty_id = try ng.resolveType(result_ty, .direct); + return try ng.constructComposite(result_ty_id, constituents); } fn indicesToIds(self: *NavGen, indices: []const u32) ![]IdRef { |
