diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-05-26 05:07:13 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-06-01 08:24:01 +0100 |
| commit | add2976a9ba76ec661ae5668eb2a8dca2ccfad42 (patch) | |
| tree | 54ea8377660395007c1ac5188863f5cbe3a3310e /src/Sema.zig | |
| parent | b48d6ff619de424e664cf11b43e2a03fecbca6ce (diff) | |
| download | zig-add2976a9ba76ec661ae5668eb2a8dca2ccfad42.tar.gz zig-add2976a9ba76ec661ae5668eb2a8dca2ccfad42.zip | |
compiler: implement better shuffle AIR
Runtime `@shuffle` has two cases which backends generally want to handle
differently for efficiency:
* One runtime vector operand; some result elements may be comptime-known
* Two runtime vector operands; some result elements may be undefined
The latter case happens if both vectors given to `@shuffle` are
runtime-known and they are both used (i.e. the mask refers to them).
Otherwise, if the result is not entirely comptime-known, we are in the
former case. `Sema` now diffentiates these two cases in the AIR so that
backends can easily handle them however they want to. Note that this
*doesn't* really involve Sema doing any more work than it would
otherwise need to, so there's not really a negative here!
Most existing backends have their lowerings for `@shuffle` migrated in
this commit. The LLVM backend uses new lowerings suggested by Jacob as
ones which it will handle effectively. The x86_64 backend has not yet
been migrated; for now there's a panic in there. Jacob will implement
that before this is merged anywhere.
Diffstat (limited to 'src/Sema.zig')
| -rw-r--r-- | src/Sema.zig | 268 |
1 files changed, 136 insertions, 132 deletions
diff --git a/src/Sema.zig b/src/Sema.zig index 34fb468716..3c4fc555cb 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -24256,8 +24256,8 @@ fn analyzeShuffle( block: *Block, src_node: std.zig.Ast.Node.Offset, elem_ty: Type, - a_arg: Air.Inst.Ref, - b_arg: Air.Inst.Ref, + a_uncoerced: Air.Inst.Ref, + b_uncoerced: Air.Inst.Ref, mask: Value, mask_len: u32, ) CompileError!Air.Inst.Ref { @@ -24266,150 +24266,154 @@ fn analyzeShuffle( const a_src = block.builtinCallArgSrc(src_node, 1); const b_src = block.builtinCallArgSrc(src_node, 2); const mask_src = block.builtinCallArgSrc(src_node, 3); - var a = a_arg; - var b = b_arg; - const res_ty = try pt.vectorType(.{ - .len = mask_len, - .child = elem_ty.toIntern(), - }); - - const maybe_a_len = switch (sema.typeOf(a).zigTypeTag(zcu)) { - .array, .vector => sema.typeOf(a).arrayLen(zcu), - .undefined => null, - else => return sema.fail(block, a_src, "expected vector or array with element type '{}', found '{}'", .{ - elem_ty.fmt(pt), - sema.typeOf(a).fmt(pt), - }), - }; - const maybe_b_len = switch (sema.typeOf(b).zigTypeTag(zcu)) { - .array, .vector => sema.typeOf(b).arrayLen(zcu), - .undefined => null, - else => return sema.fail(block, b_src, "expected vector or array with element type '{}', found '{}'", .{ - elem_ty.fmt(pt), - sema.typeOf(b).fmt(pt), - }), - }; - if (maybe_a_len == null and maybe_b_len == null) { - return pt.undefRef(res_ty); - } - const a_len: u32 = @intCast(maybe_a_len orelse maybe_b_len.?); - const b_len: u32 = @intCast(maybe_b_len orelse a_len); - - const a_ty = try pt.vectorType(.{ - .len = a_len, - .child = elem_ty.toIntern(), - }); - const b_ty = try pt.vectorType(.{ - .len = b_len, - .child = elem_ty.toIntern(), - }); - - if (maybe_a_len == null) a = try pt.undefRef(a_ty) else a = try sema.coerce(block, a_ty, a, a_src); - if (maybe_b_len == null) b = try pt.undefRef(b_ty) else b = try sema.coerce(block, b_ty, b, b_src); - - const operand_info = [2]std.meta.Tuple(&.{ u64, LazySrcLoc, Type }){ - .{ a_len, a_src, a_ty }, - .{ b_len, b_src, b_ty }, - }; - - for (0..@intCast(mask_len)) |i| { - const elem = try mask.elemValue(pt, i); - if (elem.isUndef(zcu)) continue; - const elem_resolved = try sema.resolveLazyValue(elem); - const int = elem_resolved.toSignedInt(zcu); - var unsigned: u32 = undefined; - var chosen: u32 = undefined; - if (int >= 0) { - unsigned = @intCast(int); - chosen = 0; - } else { - unsigned = @intCast(~int); - chosen = 1; + // If the type of `a` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector. + const a_len: u32 = switch (sema.typeOf(a_uncoerced).zigTypeTag(zcu)) { + .array, .vector => @intCast(sema.typeOf(a_uncoerced).arrayLen(zcu)), + .undefined => 0, + else => return sema.fail(block, a_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(a_uncoerced).fmt(pt) }), + }; + const a_ty = try pt.vectorType(.{ .len = a_len, .child = elem_ty.toIntern() }); + const a_coerced = try sema.coerce(block, a_ty, a_uncoerced, a_src); + + // If the type of `b` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector. + const b_len: u32 = switch (sema.typeOf(b_uncoerced).zigTypeTag(zcu)) { + .array, .vector => @intCast(sema.typeOf(b_uncoerced).arrayLen(zcu)), + .undefined => 0, + else => return sema.fail(block, b_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(b_uncoerced).fmt(pt) }), + }; + const b_ty = try pt.vectorType(.{ .len = b_len, .child = elem_ty.toIntern() }); + const b_coerced = try sema.coerce(block, b_ty, b_uncoerced, b_src); + + const result_ty = try pt.vectorType(.{ .len = mask_len, .child = elem_ty.toIntern() }); + + // We're going to pre-emptively reserve space in `sema.air_extra`. The reason for this is we need + // a `u32` buffer of length `mask_len` anyway, and putting it in `sema.air_extra` avoids a copy + // in the runtime case. If the result is comptime-known, we'll shrink `air_extra` back. + const air_extra_idx: u32 = @intCast(sema.air_extra.items.len); + const air_mask_buf = try sema.air_extra.addManyAsSlice(sema.gpa, mask_len); + + // We want to interpret that buffer in `air_extra` in a few ways. Initially, we'll consider its + // elements as `Air.Inst.ShuffleTwoMask`, essentially representing the raw mask values; then, we'll + // convert it to `InternPool.Index` or `Air.Inst.ShuffleOneMask` if there are comptime-known operands. + const mask_ip_index: []InternPool.Index = @ptrCast(air_mask_buf); + const mask_shuffle_one: []Air.ShuffleOneMask = @ptrCast(air_mask_buf); + const mask_shuffle_two: []Air.ShuffleTwoMask = @ptrCast(air_mask_buf); + + // Initial loop: check mask elements, populate `mask_shuffle_two`. + var a_used = false; + var b_used = false; + for (mask_shuffle_two, 0..mask_len) |*out, mask_idx| { + const mask_val = try mask.elemValue(pt, mask_idx); + if (mask_val.isUndef(zcu)) { + out.* = .undef; + continue; } - if (unsigned >= operand_info[chosen][0]) { - const msg = msg: { - const msg = try sema.errMsg(mask_src, "mask index '{d}' has out-of-bounds selection", .{i}); + // Safe because mask elements are `i32` and we already checked for undef: + const raw = (try sema.resolveLazyValue(mask_val)).toSignedInt(zcu); + if (raw >= 0) { + const idx: u32 = @intCast(raw); + a_used = true; + out.* = .aElem(idx); + if (idx >= a_len) return sema.failWithOwnedErrorMsg(block, msg: { + const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx}); errdefer msg.destroy(sema.gpa); - - try sema.errNote(operand_info[chosen][1], msg, "selected index '{d}' out of bounds of '{}'", .{ - unsigned, - operand_info[chosen][2].fmt(pt), - }); - - if (chosen == 0) { - try sema.errNote(b_src, msg, "selections from the second vector are specified with negative numbers", .{}); + try sema.errNote(a_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, a_ty.fmt(pt) }); + if (idx < b_len) { + try sema.errNote(b_src, msg, "use '~@as(u32, {d})' to index into second vector given here", .{idx}); } - break :msg msg; - }; - return sema.failWithOwnedErrorMsg(block, msg); + }); + } else { + const idx: u32 = @intCast(~raw); + b_used = true; + out.* = .bElem(idx); + if (idx >= b_len) return sema.failWithOwnedErrorMsg(block, msg: { + const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx}); + errdefer msg.destroy(sema.gpa); + try sema.errNote(b_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, b_ty.fmt(pt) }); + break :msg msg; + }); } } - if (try sema.resolveValue(a)) |a_val| { - if (try sema.resolveValue(b)) |b_val| { - const values = try sema.arena.alloc(InternPool.Index, mask_len); - for (values, 0..) |*value, i| { - const mask_elem_val = try mask.elemValue(pt, i); - if (mask_elem_val.isUndef(zcu)) { - value.* = try pt.intern(.{ .undef = elem_ty.toIntern() }); - continue; - } - const int = mask_elem_val.toSignedInt(zcu); - const unsigned: u32 = @intCast(if (int >= 0) int else ~int); - values[i] = (try (if (int >= 0) a_val else b_val).elemValue(pt, unsigned)).toIntern(); - } - return Air.internedToRef((try pt.intern(.{ .aggregate = .{ - .ty = res_ty.toIntern(), - .storage = .{ .elems = values }, - } }))); - } - } + const maybe_a_val = try sema.resolveValue(a_coerced); + const maybe_b_val = try sema.resolveValue(b_coerced); - // All static analysis passed, and not comptime. - // For runtime codegen, vectors a and b must be the same length. Here we - // recursively @shuffle the smaller vector to append undefined elements - // to it up to the length of the longer vector. This recursion terminates - // in 1 call because these calls to analyzeShuffle guarantee a_len == b_len. - if (a_len != b_len) { - const min_len = @min(a_len, b_len); - const max_src = if (a_len > b_len) a_src else b_src; - const max_len = try sema.usizeCast(block, max_src, @max(a_len, b_len)); + const a_rt = a_used and maybe_a_val == null; + const b_rt = b_used and maybe_b_val == null; - const expand_mask_values = try sema.arena.alloc(InternPool.Index, max_len); - for (@intCast(0)..@intCast(min_len)) |i| { - expand_mask_values[i] = (try pt.intValue(.comptime_int, i)).toIntern(); + if (a_rt and b_rt) { + // Both operands are needed and runtime-known. We need a `[]ShuffleTwomask`... which is + // exactly what we already have in `mask_shuffle_two`! So, we're basically done already. + // We just need to append the two operands. + try sema.air_extra.ensureUnusedCapacity(sema.gpa, 2); + sema.appendRefsAssumeCapacity(&.{ a_coerced, b_coerced }); + return block.addInst(.{ + .tag = .shuffle_two, + .data = .{ .ty_pl = .{ + .ty = Air.internedToRef(result_ty.toIntern()), + .payload = air_extra_idx, + } }, + }); + } else if (a_rt) { + // We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`. + for (mask_shuffle_two, mask_shuffle_one) |in, *out| { + out.* = switch (in.unwrap()) { + .undef => .value(try pt.undefValue(elem_ty)), + .a_elem => |idx| .elem(idx), + .b_elem => |idx| .value(try maybe_b_val.?.elemValue(pt, idx)), + }; } - for (@intCast(min_len)..@intCast(max_len)) |i| { - expand_mask_values[i] = .negative_one; + // Now just append our single runtime operand, and we're done. + try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1); + sema.appendRefsAssumeCapacity(&.{a_coerced}); + return block.addInst(.{ + .tag = .shuffle_one, + .data = .{ .ty_pl = .{ + .ty = Air.internedToRef(result_ty.toIntern()), + .payload = air_extra_idx, + } }, + }); + } else if (b_rt) { + // We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`. + for (mask_shuffle_two, mask_shuffle_one) |in, *out| { + out.* = switch (in.unwrap()) { + .undef => .value(try pt.undefValue(elem_ty)), + .a_elem => |idx| .value(try maybe_a_val.?.elemValue(pt, idx)), + .b_elem => |idx| .elem(idx), + }; } - const expand_mask = try pt.intern(.{ .aggregate = .{ - .ty = (try pt.vectorType(.{ .len = @intCast(max_len), .child = .comptime_int_type })).toIntern(), - .storage = .{ .elems = expand_mask_values }, - } }); - - if (a_len < b_len) { - const undef = try pt.undefRef(a_ty); - a = try sema.analyzeShuffle(block, src_node, elem_ty, a, undef, Value.fromInterned(expand_mask), @intCast(max_len)); - } else { - const undef = try pt.undefRef(b_ty); - b = try sema.analyzeShuffle(block, src_node, elem_ty, b, undef, Value.fromInterned(expand_mask), @intCast(max_len)); + // Now just append our single runtime operand, and we're done. + try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1); + sema.appendRefsAssumeCapacity(&.{b_coerced}); + return block.addInst(.{ + .tag = .shuffle_one, + .data = .{ .ty_pl = .{ + .ty = Air.internedToRef(result_ty.toIntern()), + .payload = air_extra_idx, + } }, + }); + } else { + // The result will be comptime-known. We must convert the `ShuffleTwoMask` values to + // `InternPool.Index` values using the known operands. + for (mask_shuffle_two, mask_ip_index) |in, *out| { + const val: Value = switch (in.unwrap()) { + .undef => try pt.undefValue(elem_ty), + .a_elem => |idx| try maybe_a_val.?.elemValue(pt, idx), + .b_elem => |idx| try maybe_b_val.?.elemValue(pt, idx), + }; + out.* = val.toIntern(); } + const res = try pt.intern(.{ .aggregate = .{ + .ty = result_ty.toIntern(), + .storage = .{ .elems = mask_ip_index }, + } }); + // We have a comptime-known result, so didn't need `air_mask_buf` -- remove it from `sema.air_extra`. + assert(sema.air_extra.items.len == air_extra_idx + air_mask_buf.len); + sema.air_extra.shrinkRetainingCapacity(air_extra_idx); + return Air.internedToRef(res); } - - return block.addInst(.{ - .tag = .shuffle, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try block.sema.addExtra(Air.Shuffle{ - .a = a, - .b = b, - .mask = mask.toIntern(), - .mask_len = mask_len, - }), - } }, - }); } fn zirSelect(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref { |
