diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2025-05-31 18:54:01 -0400 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-06-01 08:24:01 +0100 |
| commit | ec579aa0f372b2054ad659aaacd190c1a986d7f2 (patch) | |
| tree | 193cca8db61e7885b94639e18319b4d98e586552 | |
| parent | add2976a9ba76ec661ae5668eb2a8dca2ccfad42 (diff) | |
| download | zig-ec579aa0f372b2054ad659aaacd190c1a986d7f2.tar.gz zig-ec579aa0f372b2054ad659aaacd190c1a986d7f2.zip | |
Legalize: implement scalarization of `@shuffle`
| -rw-r--r-- | lib/std/Target.zig | 27 | ||||
| -rw-r--r-- | lib/std/array_hash_map.zig | 17 | ||||
| -rw-r--r-- | lib/std/crypto/chacha20.zig | 9 | ||||
| -rw-r--r-- | lib/std/hash/xxhash.zig | 3 | ||||
| -rw-r--r-- | lib/std/simd.zig | 4 | ||||
| -rw-r--r-- | src/Air.zig | 4 | ||||
| -rw-r--r-- | src/Air/Legalize.zig | 350 | ||||
| -rw-r--r-- | src/arch/wasm/CodeGen.zig | 7 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 37 | ||||
| -rw-r--r-- | test/behavior/shuffle.zig | 5 | ||||
| -rw-r--r-- | test/behavior/vector.zig | 3 |
11 files changed, 328 insertions, 138 deletions
diff --git a/lib/std/Target.zig b/lib/std/Target.zig index bf5a6369b5..b60de995fa 100644 --- a/lib/std/Target.zig +++ b/lib/std/Target.zig @@ -1246,11 +1246,7 @@ pub const Cpu = struct { /// Adds the specified feature set but not its dependencies. pub fn addFeatureSet(set: *Set, other_set: Set) void { - if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) { - for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* |= other_set_int; - } else { - set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints); - } + set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints); } /// Removes the specified feature but not its dependents. @@ -1262,11 +1258,7 @@ pub const Cpu = struct { /// Removes the specified feature but not its dependents. pub fn removeFeatureSet(set: *Set, other_set: Set) void { - if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) { - for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* &= ~other_set_int; - } else { - set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints); - } + set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints); } pub fn populateDependencies(set: *Set, all_features_list: []const Cpu.Feature) void { @@ -1295,17 +1287,10 @@ pub const Cpu = struct { } pub fn isSuperSetOf(set: Set, other_set: Set) bool { - if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) { - var result = true; - for (&set.ints, other_set.ints) |*set_int, other_set_int| - result = result and (set_int.* & other_set_int) == other_set_int; - return result; - } else { - const V = @Vector(usize_count, usize); - const set_v: V = set.ints; - const other_v: V = other_set.ints; - return @reduce(.And, (set_v & other_v) == other_v); - } + const V = @Vector(usize_count, usize); + const set_v: V = set.ints; + const other_v: V = other_set.ints; + return @reduce(.And, (set_v & other_v) == other_v); } }; diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig index b0b9e19169..ac9c70df8e 100644 --- a/lib/std/array_hash_map.zig +++ b/lib/std/array_hash_map.zig @@ -889,19 +889,10 @@ pub fn ArrayHashMapUnmanaged( self.pointer_stability.lock(); defer self.pointer_stability.unlock(); - if (new_capacity <= linear_scan_max) { - try self.entries.ensureTotalCapacity(gpa, new_capacity); - return; - } - - if (self.index_header) |header| { - if (new_capacity <= header.capacity()) { - try self.entries.ensureTotalCapacity(gpa, new_capacity); - return; - } - } - try self.entries.ensureTotalCapacity(gpa, new_capacity); + if (new_capacity <= linear_scan_max) return; + if (self.index_header) |header| if (new_capacity <= header.capacity()) return; + const new_bit_index = try IndexHeader.findBitIndex(new_capacity); const new_header = try IndexHeader.alloc(gpa, new_bit_index); @@ -2116,7 +2107,7 @@ const IndexHeader = struct { fn findBitIndex(desired_capacity: usize) Allocator.Error!u8 { if (desired_capacity > max_capacity) return error.OutOfMemory; - var new_bit_index = @as(u8, @intCast(std.math.log2_int_ceil(usize, desired_capacity))); + var new_bit_index: u8 = @intCast(std.math.log2_int_ceil(usize, desired_capacity)); if (desired_capacity > index_capacities[new_bit_index]) new_bit_index += 1; if (new_bit_index < min_bit_index) new_bit_index = min_bit_index; assert(desired_capacity <= index_capacities[new_bit_index]); diff --git a/lib/std/crypto/chacha20.zig b/lib/std/crypto/chacha20.zig index 564df2933f..495bad3efc 100644 --- a/lib/std/crypto/chacha20.zig +++ b/lib/std/crypto/chacha20.zig @@ -499,15 +499,12 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type { fn ChaChaImpl(comptime rounds_nb: usize) type { switch (builtin.cpu.arch) { .x86_64 => { - const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2); - const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); - if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4); - if (has_avx2) return ChaChaVecImpl(rounds_nb, 2); + if (builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f)) return ChaChaVecImpl(rounds_nb, 4); + if (std.Target.x86.featureSetHas(builtin.cpu.features, .avx2)) return ChaChaVecImpl(rounds_nb, 2); return ChaChaVecImpl(rounds_nb, 1); }, .aarch64 => { - const has_neon = std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon); - if (has_neon) return ChaChaVecImpl(rounds_nb, 4); + if (builtin.zig_backend != .stage2_aarch64 and std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon)) return ChaChaVecImpl(rounds_nb, 4); return ChaChaNonVecImpl(rounds_nb); }, else => return ChaChaNonVecImpl(rounds_nb), diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig index f12f6341a4..b3128f39b2 100644 --- a/lib/std/hash/xxhash.zig +++ b/lib/std/hash/xxhash.zig @@ -780,7 +780,6 @@ fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64) } test "xxhash3" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807 const H = XxHash3; @@ -814,7 +813,6 @@ test "xxhash3" { } test "xxhash3 smhasher" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807 const Test = struct { @@ -828,7 +826,6 @@ test "xxhash3 smhasher" { } test "xxhash3 iterative api" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807 const Test = struct { diff --git a/lib/std/simd.zig b/lib/std/simd.zig index cf4f7675fa..b2f8b7db5d 100644 --- a/lib/std/simd.zig +++ b/lib/std/simd.zig @@ -231,8 +231,6 @@ pub fn extract( } test "vector patterns" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const base = @Vector(4, u32){ 10, 20, 30, 40 }; const other_base = @Vector(4, u32){ 55, 66, 77, 88 }; @@ -302,8 +300,6 @@ pub fn reverseOrder(vec: anytype) @TypeOf(vec) { } test "vector shifting" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; - const base = @Vector(4, u32){ 10, 20, 30, 40 }; try std.testing.expectEqual([4]u32{ 30, 40, 999, 999 }, shiftElementsLeft(base, 2, 999)); diff --git a/src/Air.zig b/src/Air.zig index ccfe9e9694..b315acecce 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -704,7 +704,7 @@ pub const Inst = struct { /// Uses the `ty_pl` field, where the payload index points to: /// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty` /// 2. operand: Ref // guaranteed not to be an interned value - /// See `unwrapShufleOne`. + /// See `unwrapShuffleOne`. shuffle_one, /// Constructs a vector by selecting elements from two vectors based on a mask. Each mask /// element is either an index into one of the vectors, or "undef". @@ -712,7 +712,7 @@ pub const Inst = struct { /// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty` /// 2. operand_a: Ref // guaranteed not to be an interned value /// 3. operand_b: Ref // guaranteed not to be an interned value - /// See `unwrapShufleTwo`. + /// See `unwrapShuffleTwo`. shuffle_two, /// Constructs a vector element-wise from `a` or `b` based on `pred`. /// Uses the `pl_op` field with `pred` as operand, and payload `Bin`. diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index b71725995a..4618bc4389 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -74,6 +74,8 @@ pub const Feature = enum { scalarize_int_from_float, scalarize_int_from_float_optimized, scalarize_float_from_int, + scalarize_shuffle_one, + scalarize_shuffle_two, scalarize_select, scalarize_mul_add, @@ -168,7 +170,9 @@ pub const Feature = enum { .int_from_float => .scalarize_int_from_float, .int_from_float_optimized => .scalarize_int_from_float_optimized, .float_from_int => .scalarize_float_from_int, - .select => .scalarize_select, + .shuffle_one => .scalarize_shuffle_one, + .shuffle_two => .scalarize_shuffle_two, + .select => .scalarize_selects, .mul_add => .scalarize_mul_add, }; } @@ -521,11 +525,10 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } }, .splat, - .shuffle_one, - .shuffle_two, => {}, - .select, - => if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select_pl_op_bin), + .shuffle_one => if (l.features.contains(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one), + .shuffle_two => if (l.features.contains(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two), + .select => if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select), .memset, .memset_safe, .memcpy, @@ -573,25 +576,26 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } } -const ScalarizeDataTag = enum { un_op, ty_op, bin_op, ty_pl_vector_cmp, pl_op_bin, select_pl_op_bin }; -inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_tag: ScalarizeDataTag) Error!Air.Inst.Tag { - return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, data_tag)); +const ScalarizeForm = enum { un_op, ty_op, bin_op, ty_pl_vector_cmp, pl_op_bin, shuffle_one, shuffle_two, select }; +inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag { + return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form)); } -fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_tag: ScalarizeDataTag) Error!Air.Inst.Data { +fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; const orig = l.air_instructions.get(@intFromEnum(orig_inst)); const res_ty = l.typeOfIndex(orig_inst); + const res_len = res_ty.vectorLen(zcu); - var inst_buf: [ - 5 + switch (data_tag) { - .un_op, .ty_op => 1, - .bin_op, .ty_pl_vector_cmp => 2, - .pl_op_bin => 3, - .select_pl_op_bin => 6, - } + 9 - ]Air.Inst.Index = undefined; + const extra_insts = switch (form) { + .un_op, .ty_op => 1, + .bin_op, .ty_pl_vector_cmp => 2, + .pl_op_bin => 3, + .shuffle_one, .shuffle_two => 13, + .select => 6, + }; + var inst_buf: [5 + extra_insts + 9]Air.Inst.Index = undefined; try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); var res_block: Block = .init(&inst_buf); @@ -628,7 +632,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ .vector_ptr = res_alloc_inst.toRef(), .payload = try l.addExtra(Air.Bin, .{ .lhs = cur_index_inst.toRef(), - .rhs = res_elem: switch (data_tag) { + .rhs = res_elem: switch (form) { .un_op => loop.block.add(l, .{ .tag = orig.tag, .data = .{ .un_op = loop.block.add(l, .{ @@ -638,7 +642,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ .rhs = cur_index_inst.toRef(), } }, }).toRef() }, - }), + }).toRef(), .ty_op => loop.block.add(l, .{ .tag = orig.tag, .data = .{ .ty_op = .{ @@ -651,7 +655,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ } }, }).toRef(), } }, - }), + }).toRef(), .bin_op => loop.block.add(l, .{ .tag = orig.tag, .data = .{ .bin_op = .{ @@ -670,10 +674,10 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ } }, }).toRef(), } }, - }), + }).toRef(), .ty_pl_vector_cmp => { const extra = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; - break :res_elem try loop.block.addCmp( + break :res_elem (try loop.block.addCmp( l, extra.compareOperator(), loop.block.add(l, .{ @@ -695,7 +699,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ .cmp_vector => false, .cmp_vector_optimized => true, } }, - ); + )).toRef(); }, .pl_op_bin => { const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; @@ -726,58 +730,265 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ } }, }).toRef(), } }, - }); + }).toRef(); }, - .select_pl_op_bin => { - const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - var res_elem: Result = .init(l, l.typeOf(extra.lhs).scalarType(zcu), &loop.block); - res_elem.block = .init(loop.block.stealCapacity(6)); + .shuffle_one, .shuffle_two => { + const ip = &zcu.intern_pool; + const unwrapped = switch (form) { + else => comptime unreachable, + .shuffle_one => l.getTmpAir().unwrapShuffleOne(zcu, orig_inst), + .shuffle_two => l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst), + }; + const operand_a = switch (form) { + else => comptime unreachable, + .shuffle_one => unwrapped.operand, + .shuffle_two => unwrapped.operand_a, + }; + const operand_a_len = l.typeOf(operand_a).vectorLen(zcu); + const elem_ty = unwrapped.result_ty.scalarType(zcu); + var res_elem: Result = .init(l, elem_ty, &loop.block); + res_elem.block = .init(loop.block.stealCapacity(extra_insts)); { - var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{ - .tag = .array_elem_val, + const ExpectedContents = extern struct { + mask_elems: [128]InternPool.Index, + ct_elems: switch (form) { + else => unreachable, + .shuffle_one => extern struct { + keys: [152]InternPool.Index, + header: u8 align(@alignOf(u32)), + index: [256][2]u8, + }, + .shuffle_two => void, + }, + }; + var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = + std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa); + const gpa = stack.get(); + + const mask_elems = try gpa.alloc(InternPool.Index, res_len); + defer gpa.free(mask_elems); + + var ct_elems: switch (form) { + else => unreachable, + .shuffle_one => std.AutoArrayHashMapUnmanaged(InternPool.Index, void), + .shuffle_two => struct { + const empty: @This() = .{}; + inline fn deinit(_: @This(), _: std.mem.Allocator) void {} + inline fn ensureTotalCapacity(_: @This(), _: std.mem.Allocator, _: usize) error{}!void {} + }, + } = .empty; + defer ct_elems.deinit(gpa); + try ct_elems.ensureTotalCapacity(gpa, res_len); + + const mask_elem_ty = try pt.intType(.signed, 1 + Type.smallestUnsignedBits(@max(operand_a_len, switch (form) { + else => comptime unreachable, + .shuffle_one => res_len, + .shuffle_two => l.typeOf(unwrapped.operand_b).vectorLen(zcu), + }))); + for (mask_elems, unwrapped.mask) |*mask_elem_val, mask_elem| mask_elem_val.* = (try pt.intValue(mask_elem_ty, switch (form) { + else => comptime unreachable, + .shuffle_one => switch (mask_elem.unwrap()) { + .elem => |index| index, + .value => |elem_val| if (ip.isUndef(elem_val)) + operand_a_len + else + ~@as(i33, @intCast((ct_elems.getOrPutAssumeCapacity(elem_val)).index)), + }, + .shuffle_two => switch (mask_elem.unwrap()) { + .a_elem => |a_index| a_index, + .b_elem => |b_index| ~@as(i33, b_index), + .undef => operand_a_len, + }, + })).toIntern(); + const mask_ty = try pt.arrayType(.{ + .len = res_len, + .child = mask_elem_ty.toIntern(), + }); + const mask_elem_inst = res_elem.block.add(l, .{ + .tag = .ptr_elem_val, .data = .{ .bin_op = .{ - .lhs = orig.data.pl_op.operand, + .lhs = Air.internedToRef(try pt.intern(.{ .ptr = .{ + .ty = (try pt.manyConstPtrType(mask_elem_ty)).toIntern(), + .base_addr = .{ .uav = .{ + .val = try pt.intern(.{ .aggregate = .{ + .ty = mask_ty.toIntern(), + .storage = .{ .elems = mask_elems }, + } }), + .orig_ty = (try pt.singleConstPtrType(mask_ty)).toIntern(), + } }, + .byte_offset = 0, + } })), .rhs = cur_index_inst.toRef(), } }, - }).toRef(), &res_elem.block, .{}); - select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); + }); + var def_cond_br: CondBr = .init(l, (try res_elem.block.addCmp( + l, + .lt, + mask_elem_inst.toRef(), + try pt.intRef(mask_elem_ty, operand_a_len), + .{}, + )).toRef(), &res_elem.block, .{}); + def_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); { - _ = select_cond_br.then_block.add(l, .{ + const operand_b_used = switch (form) { + else => comptime unreachable, + .shuffle_one => ct_elems.count() > 0, + .shuffle_two => true, + }; + var operand_cond_br: CondBr = undefined; + operand_cond_br.then_block = if (operand_b_used) then_block: { + operand_cond_br = .init(l, (try def_cond_br.then_block.addCmp( + l, + .gte, + mask_elem_inst.toRef(), + try pt.intRef(mask_elem_ty, 0), + .{}, + )).toRef(), &def_cond_br.then_block, .{}); + break :then_block .init(def_cond_br.then_block.stealRemainingCapacity()); + } else def_cond_br.then_block; + _ = operand_cond_br.then_block.add(l, .{ .tag = .br, .data = .{ .br = .{ .block_inst = res_elem.inst, - .operand = select_cond_br.then_block.add(l, .{ + .operand = operand_cond_br.then_block.add(l, .{ .tag = .array_elem_val, .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), + .lhs = operand_a, + .rhs = operand_cond_br.then_block.add(l, .{ + .tag = .intcast, + .data = .{ .ty_op = .{ + .ty = .usize_type, + .operand = mask_elem_inst.toRef(), + } }, + }).toRef(), } }, }).toRef(), } }, }); + if (operand_b_used) { + operand_cond_br.else_block = .init(operand_cond_br.then_block.stealRemainingCapacity()); + _ = operand_cond_br.else_block.add(l, .{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = res_elem.inst, + .operand = if (switch (form) { + else => comptime unreachable, + .shuffle_one => ct_elems.count() > 1, + .shuffle_two => true, + }) operand_cond_br.else_block.add(l, .{ + .tag = switch (form) { + else => comptime unreachable, + .shuffle_one => .ptr_elem_val, + .shuffle_two => .array_elem_val, + }, + .data = .{ .bin_op = .{ + .lhs = operand_b: switch (form) { + else => comptime unreachable, + .shuffle_one => { + const ct_elems_ty = try pt.arrayType(.{ + .len = ct_elems.count(), + .child = elem_ty.toIntern(), + }); + break :operand_b Air.internedToRef(try pt.intern(.{ .ptr = .{ + .ty = (try pt.manyConstPtrType(elem_ty)).toIntern(), + .base_addr = .{ .uav = .{ + .val = try pt.intern(.{ .aggregate = .{ + .ty = ct_elems_ty.toIntern(), + .storage = .{ .elems = ct_elems.keys() }, + } }), + .orig_ty = (try pt.singleConstPtrType(ct_elems_ty)).toIntern(), + } }, + .byte_offset = 0, + } })); + }, + .shuffle_two => unwrapped.operand_b, + }, + .rhs = operand_cond_br.else_block.add(l, .{ + .tag = .intcast, + .data = .{ .ty_op = .{ + .ty = .usize_type, + .operand = operand_cond_br.else_block.add(l, .{ + .tag = .not, + .data = .{ .ty_op = .{ + .ty = Air.internedToRef(mask_elem_ty.toIntern()), + .operand = mask_elem_inst.toRef(), + } }, + }).toRef(), + } }, + }).toRef(), + } }, + }).toRef() else res_elem_br: { + _ = operand_cond_br.else_block.stealCapacity(3); + break :res_elem_br Air.internedToRef(ct_elems.keys()[0]); + }, + } }, + }); + def_cond_br.else_block = .init(operand_cond_br.else_block.stealRemainingCapacity()); + try operand_cond_br.finish(l); + } else { + def_cond_br.then_block = operand_cond_br.then_block; + _ = def_cond_br.then_block.stealCapacity(6); + def_cond_br.else_block = .init(def_cond_br.then_block.stealRemainingCapacity()); + } } + _ = def_cond_br.else_block.add(l, .{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = res_elem.inst, + .operand = try pt.undefRef(elem_ty), + } }, + }); + try def_cond_br.finish(l); + } + try res_elem.finish(l); + break :res_elem res_elem.inst.toRef(); + }, + .select => { + const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + var res_elem: Result = .init(l, l.typeOf(extra.lhs).scalarType(zcu), &loop.block); + res_elem.block = .init(loop.block.stealCapacity(extra_insts)); + { + var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{ + .tag = .array_elem_val, + .data = .{ .bin_op = .{ + .lhs = orig.data.pl_op.operand, + .rhs = cur_index_inst.toRef(), + } }, + }).toRef(), &res_elem.block, .{}); + select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); + _ = select_cond_br.then_block.add(l, .{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = res_elem.inst, + .operand = select_cond_br.then_block.add(l, .{ + .tag = .array_elem_val, + .data = .{ .bin_op = .{ + .lhs = extra.lhs, + .rhs = cur_index_inst.toRef(), + } }, + }).toRef(), + } }, + }); select_cond_br.else_block = .init(select_cond_br.then_block.stealRemainingCapacity()); - { - _ = select_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = select_cond_br.else_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }); - } + _ = select_cond_br.else_block.add(l, .{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = res_elem.inst, + .operand = select_cond_br.else_block.add(l, .{ + .tag = .array_elem_val, + .data = .{ .bin_op = .{ + .lhs = extra.rhs, + .rhs = cur_index_inst.toRef(), + } }, + }).toRef(), + } }, + }); try select_cond_br.finish(l); } try res_elem.finish(l); - break :res_elem res_elem.inst; + break :res_elem res_elem.inst.toRef(); }, - }.toRef(), + }, }), } }, }); @@ -786,7 +997,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ l, .lt, cur_index_inst.toRef(), - try pt.intRef(.usize, res_ty.vectorLen(zcu) - 1), + try pt.intRef(.usize, res_len - 1), .{}, )).toRef(), &loop.block, .{}); loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); @@ -810,21 +1021,19 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_ }); } loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - { - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - } + _ = loop_cond_br.else_block.add(l, .{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = orig_inst, + .operand = loop_cond_br.else_block.add(l, .{ + .tag = .load, + .data = .{ .ty_op = .{ + .ty = Air.internedToRef(res_ty.toIntern()), + .operand = res_alloc_inst.toRef(), + } }, + }).toRef(), + } }, + }); try loop_cond_br.finish(l); } try loop.finish(l); @@ -1337,6 +1546,7 @@ inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, tag: Air.Inst.Tag, dat const Air = @import("../Air.zig"); const assert = std.debug.assert; const dev = @import("../dev.zig"); +const InternPool = @import("../InternPool.zig"); const Legalize = @This(); const std = @import("std"); const Type = @import("../Type.zig"); diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index ebc46179c3..55a61088d0 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -5195,6 +5195,8 @@ fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible. // I tried to fix it, but I couldn't make much sense of how this backend handles memory. + if (!isByRef(result_ty, zcu, cg.target) or + !isByRef(cg.typeOf(unwrapped.operand), zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{}); const dest_alloc = try cg.allocStack(result_ty); for (mask, 0..) |mask_elem, out_idx| { @@ -5232,7 +5234,7 @@ fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { elem_ty.bitSize(zcu) % 8 == 0) { var lane_map: [16]u8 align(4) = undefined; - const lanes_per_elem = elem_ty.bitSize(zcu) / 8; + const lanes_per_elem: usize = @intCast(elem_ty.bitSize(zcu) / 8); for (mask, 0..) |mask_elem, out_idx| { const out_first_lane = out_idx * lanes_per_elem; const in_first_lane = switch (mask_elem.unwrap()) { @@ -5260,6 +5262,9 @@ fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible. // I tried to fix it, but I couldn't make much sense of how this backend handles memory. + if (!isByRef(result_ty, zcu, cg.target) or + !isByRef(a_ty, zcu, cg.target) or + !isByRef(b_ty, zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{}); const dest_alloc = try cg.allocStack(result_ty); for (mask, 0..) |mask_elem, out_idx| { diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f6d8d61adc..4c12e72cfa 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -53,11 +53,14 @@ pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features .scalarize_div_exact_optimized = use_old, .scalarize_max = use_old, .scalarize_min = use_old, + .scalarize_bit_and = use_old, + .scalarize_bit_or = use_old, .scalarize_shr = true, .scalarize_shr_exact = true, .scalarize_shl = true, .scalarize_shl_exact = true, .scalarize_shl_sat = true, + .scalarize_xor = use_old, .scalarize_not = use_old, .scalarize_clz = use_old, .scalarize_ctz = true, @@ -84,6 +87,8 @@ pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features .scalarize_int_from_float = use_old, .scalarize_int_from_float_optimized = use_old, .scalarize_float_from_int = use_old, + .scalarize_shuffle_one = true, + .scalarize_shuffle_two = true, .scalarize_select = true, .scalarize_mul_add = use_old, @@ -2299,11 +2304,17 @@ fn gen(self: *CodeGen) InnerError!void { try self.genBody(self.air.getMainBody()); const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: { - const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1; - for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: { - _ = self.mir_instructions.pop(); - break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index]; - } else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc); + var last_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len - 1); + while (self.epilogue_relocs.getLastOrNull() == last_inst) { + self.epilogue_relocs.items.len -= 1; + self.mir_instructions.set(last_inst, .{ + .tag = .pseudo, + .ops = .pseudo_dead_none, + .data = undefined, + }); + last_inst -= 1; + } + for (self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc); if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); const backpatch_stack_dealloc = try self.asmPlaceholder(); @@ -174143,17 +174154,23 @@ fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index var block_data = self.blocks.fetchRemove(inst).?; defer block_data.value.deinit(self.gpa); if (block_data.value.relocs.items.len > 0) { + var last_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len - 1); + while (block_data.value.relocs.getLastOrNull() == last_inst) { + block_data.value.relocs.items.len -= 1; + self.mir_instructions.set(last_inst, .{ + .tag = .pseudo, + .ops = .pseudo_dead_none, + .data = undefined, + }); + last_inst -= 1; + } + for (block_data.value.relocs.items) |block_reloc| self.performReloc(block_reloc); try self.restoreState(block_data.value.state, liveness.deaths, .{ .emit_instructions = false, .update_tracking = true, .resurrect = true, .close_scope = true, }); - const block_relocs_last_index = block_data.value.relocs.items.len - 1; - for (if (block_data.value.relocs.items[block_relocs_last_index] == self.mir_instructions.len - 1) block_relocs: { - _ = self.mir_instructions.pop(); - break :block_relocs block_data.value.relocs.items[0..block_relocs_last_index]; - } else block_data.value.relocs.items) |block_reloc| self.performReloc(block_reloc); } if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i); diff --git a/test/behavior/shuffle.zig b/test/behavior/shuffle.zig index 125a0ddf7a..12e613d3d5 100644 --- a/test/behavior/shuffle.zig +++ b/test/behavior/shuffle.zig @@ -10,8 +10,6 @@ test "@shuffle int" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -53,7 +51,6 @@ test "@shuffle int" { test "@shuffle int strange sizes" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -136,7 +133,6 @@ fn testShuffle( test "@shuffle bool 1" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -160,7 +156,6 @@ test "@shuffle bool 1" { test "@shuffle bool 2" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 168e2f9db0..b22693e388 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -906,8 +906,6 @@ test "mask parameter of @shuffle is comptime scope" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest; const __v4hi = @Vector(4, i16); var v4_a = __v4hi{ 1, 2, 3, 4 }; @@ -1357,7 +1355,6 @@ test "array operands to shuffle are coerced to vectors" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; const mask = [5]i32{ -1, 0, 1, 2, 3 }; |
