aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobin Voetter <robin@voetter.nl>2024-04-01 09:51:04 +0200
committerGitHub <noreply@github.com>2024-04-01 09:51:04 +0200
commitd2be725e4b14c33dbd39054e33d926913eee3cd4 (patch)
treed9bf0cde23ca8553192f0cc8bc77762ce0eaf1b4 /src
parent3cb987f5a575bc5871459805a86640ba1a2d7cae (diff)
parent27b91288dc3c0442b645e06cae75c076600bdfd3 (diff)
downloadzig-d2be725e4b14c33dbd39054e33d926913eee3cd4.tar.gz
zig-d2be725e4b14c33dbd39054e33d926913eee3cd4.zip
Merge pull request #19490 from Snektron/spirv-dedup
spirv: deduplication pass
Diffstat (limited to 'src')
-rw-r--r--src/codegen/spirv.zig80
-rw-r--r--src/link/SpirV.zig2
-rw-r--r--src/link/SpirV/BinaryModule.zig2
-rw-r--r--src/link/SpirV/deduplicate.zig482
4 files changed, 566 insertions, 0 deletions
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index 6b13f2623a..9113d72d92 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -2332,6 +2332,9 @@ const DeclGen = struct {
.mul_add => try self.airMulAdd(inst),
+ .ctz => try self.airClzCtz(inst, .ctz),
+ .clz => try self.airClzCtz(inst, .clz),
+
.splat => try self.airSplat(inst),
.reduce, .reduce_optimized => try self.airReduce(inst),
.shuffle => try self.airShuffle(inst),
@@ -3029,6 +3032,83 @@ const DeclGen = struct {
return try wip.finalize();
}
+ fn airClzCtz(self: *DeclGen, inst: Air.Inst.Index, op: enum { clz, ctz }) !?IdRef {
+ if (self.liveness.isUnused(inst)) return null;
+
+ const mod = self.module;
+ const target = self.getTarget();
+ const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+ const result_ty = self.typeOfIndex(inst);
+ const operand_ty = self.typeOf(ty_op.operand);
+ const operand = try self.resolve(ty_op.operand);
+
+ const info = self.arithmeticTypeInfo(operand_ty);
+ switch (info.class) {
+ .composite_integer => unreachable, // TODO
+ .integer, .strange_integer => {},
+ .float, .bool => unreachable,
+ }
+
+ var wip = try self.elementWise(result_ty, false);
+ defer wip.deinit();
+
+ const elem_ty = if (wip.is_array) operand_ty.scalarType(mod) else operand_ty;
+ const elem_ty_ref = try self.resolveType(elem_ty, .direct);
+ const elem_ty_id = self.typeId(elem_ty_ref);
+
+ for (wip.results, 0..) |*result_id, i| {
+ const elem = try wip.elementAt(operand_ty, operand, i);
+
+ switch (target.os.tag) {
+ .opencl => {
+ const set = try self.spv.importInstructionSet(.@"OpenCL.std");
+ const ext_inst: u32 = switch (op) {
+ .clz => 151, // clz
+ .ctz => 152, // ctz
+ };
+
+ // Note: result of OpenCL ctz/clz returns operand_ty, and we want result_ty.
+ // result_ty is always large enough to hold the result, so we might have to down
+ // cast it.
+ const tmp = self.spv.allocId();
+ try self.func.body.emit(self.spv.gpa, .OpExtInst, .{
+ .id_result_type = elem_ty_id,
+ .id_result = tmp,
+ .set = set,
+ .instruction = .{ .inst = ext_inst },
+ .id_ref_4 = &.{elem},
+ });
+
+ if (wip.ty_id == elem_ty_id) {
+ result_id.* = tmp;
+ continue;
+ }
+
+ result_id.* = self.spv.allocId();
+ if (result_ty.scalarType(mod).isSignedInt(mod)) {
+ assert(elem_ty.scalarType(mod).isSignedInt(mod));
+ try self.func.body.emit(self.spv.gpa, .OpSConvert, .{
+ .id_result_type = wip.ty_id,
+ .id_result = result_id.*,
+ .signed_value = tmp,
+ });
+ } else {
+ assert(elem_ty.scalarType(mod).isUnsignedInt(mod));
+ try self.func.body.emit(self.spv.gpa, .OpUConvert, .{
+ .id_result_type = wip.ty_id,
+ .id_result = result_id.*,
+ .unsigned_value = tmp,
+ });
+ }
+ },
+ .vulkan => unreachable, // TODO
+ else => unreachable,
+ }
+ }
+
+ return try wip.finalize();
+ }
+
fn airSplat(self: *DeclGen, inst: Air.Inst.Index) !?IdRef {
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const operand_id = try self.resolve(ty_op.operand);
diff --git a/src/link/SpirV.zig b/src/link/SpirV.zig
index dc25ac5105..728db2d848 100644
--- a/src/link/SpirV.zig
+++ b/src/link/SpirV.zig
@@ -261,6 +261,7 @@ fn linkModule(self: *SpirV, a: Allocator, module: []Word) ![]Word {
const lower_invocation_globals = @import("SpirV/lower_invocation_globals.zig");
const prune_unused = @import("SpirV/prune_unused.zig");
+ const dedup = @import("SpirV/deduplicate.zig");
var parser = try BinaryModule.Parser.init(a);
defer parser.deinit();
@@ -268,6 +269,7 @@ fn linkModule(self: *SpirV, a: Allocator, module: []Word) ![]Word {
try lower_invocation_globals.run(&parser, &binary);
try prune_unused.run(&parser, &binary);
+ try dedup.run(&parser, &binary);
return binary.finalize(a);
}
diff --git a/src/link/SpirV/BinaryModule.zig b/src/link/SpirV/BinaryModule.zig
index 0c9c32c98e..e150890315 100644
--- a/src/link/SpirV/BinaryModule.zig
+++ b/src/link/SpirV/BinaryModule.zig
@@ -94,6 +94,8 @@ pub const ParseError = error{
DuplicateId,
/// Some ID did not resolve.
InvalidId,
+ /// This opcode or instruction is not supported yet.
+ UnsupportedOperation,
/// Parser ran out of memory.
OutOfMemory,
};
diff --git a/src/link/SpirV/deduplicate.zig b/src/link/SpirV/deduplicate.zig
new file mode 100644
index 0000000000..4a73276a9a
--- /dev/null
+++ b/src/link/SpirV/deduplicate.zig
@@ -0,0 +1,482 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const log = std.log.scoped(.spirv_link);
+const assert = std.debug.assert;
+
+const BinaryModule = @import("BinaryModule.zig");
+const Section = @import("../../codegen/spirv/Section.zig");
+const spec = @import("../../codegen/spirv/spec.zig");
+const Opcode = spec.Opcode;
+const ResultId = spec.IdResult;
+const Word = spec.Word;
+
+fn canDeduplicate(opcode: Opcode) bool {
+ return switch (opcode) {
+ .OpTypeForwardPointer => false, // Don't need to handle these
+ .OpGroupDecorate, .OpGroupMemberDecorate => {
+ // These are deprecated, so don't bother supporting them for now.
+ return false;
+ },
+ // Debug decoration-style instructions
+ .OpName, .OpMemberName => true,
+ else => switch (opcode.class()) {
+ .TypeDeclaration,
+ .ConstantCreation,
+ .Annotation,
+ => true,
+ else => false,
+ },
+ };
+}
+
+const ModuleInfo = struct {
+ /// This models a type, decoration or constant instruction
+ /// and its dependencies.
+ const Entity = struct {
+ /// The type that this entity represents. This is just
+ /// the instruction opcode.
+ kind: Opcode,
+ /// The offset of this entity's operands, in
+ /// `binary.instructions`.
+ first_operand: u32,
+ /// The number of operands in this entity
+ num_operands: u16,
+ /// The (first_operand-relative) offset of the result-id,
+ /// or the entity that is affected by this entity if this entity
+ /// is a decoration.
+ result_id_index: u16,
+ /// The first decoration in `self.decorations`.
+ first_decoration: u32,
+ };
+
+ /// Maps result-id to Entity's
+ entities: std.AutoArrayHashMapUnmanaged(ResultId, Entity),
+ /// A bit set that keeps track of which operands are result-ids.
+ /// Note: This also includes any result-id!
+ /// Because we need these values when recoding the module anyway,
+ /// it contains the status of ALL operands in the module.
+ operand_is_id: std.DynamicBitSetUnmanaged,
+ /// Store of decorations for each entity.
+ decorations: []const Entity,
+
+ pub fn parse(
+ arena: Allocator,
+ parser: *BinaryModule.Parser,
+ binary: BinaryModule,
+ ) !ModuleInfo {
+ var entities = std.AutoArrayHashMap(ResultId, Entity).init(arena);
+ var id_offsets = std.ArrayList(u16).init(arena);
+ var operand_is_id = try std.DynamicBitSetUnmanaged.initEmpty(arena, binary.instructions.len);
+ var decorations = std.MultiArrayList(struct { target_id: ResultId, entity: Entity }){};
+
+ var it = binary.iterateInstructions();
+ while (it.next()) |inst| {
+ id_offsets.items.len = 0;
+ try parser.parseInstructionResultIds(binary, inst, &id_offsets);
+
+ const first_operand_offset: u32 = @intCast(inst.offset + 1);
+ for (id_offsets.items) |offset| {
+ operand_is_id.set(first_operand_offset + offset);
+ }
+
+ if (!canDeduplicate(inst.opcode)) continue;
+
+ const result_id_index: u16 = switch (inst.opcode.class()) {
+ .TypeDeclaration, .Annotation, .Debug => 0,
+ .ConstantCreation => 1,
+ else => unreachable,
+ };
+
+ const result_id: ResultId = @enumFromInt(inst.operands[id_offsets.items[result_id_index]]);
+ const entity = Entity{
+ .kind = inst.opcode,
+ .first_operand = first_operand_offset,
+ .num_operands = @intCast(inst.operands.len),
+ .result_id_index = result_id_index,
+ .first_decoration = undefined, // Filled in later
+ };
+
+ switch (inst.opcode.class()) {
+ .Annotation, .Debug => {
+ try decorations.append(arena, .{
+ .target_id = result_id,
+ .entity = entity,
+ });
+ },
+ .TypeDeclaration, .ConstantCreation => {
+ const entry = try entities.getOrPut(result_id);
+ if (entry.found_existing) {
+ log.err("type or constant {} has duplicate definition", .{result_id});
+ return error.DuplicateId;
+ }
+ entry.value_ptr.* = entity;
+ },
+ else => unreachable,
+ }
+ }
+
+ // Sort decorations by the index of the result-id in `entities.
+ // This ensures not only that the decorations of a particular reuslt-id
+ // are continuous, but the subsequences also appear in the same order as in `entities`.
+
+ const SortContext = struct {
+ entities: std.AutoArrayHashMapUnmanaged(ResultId, Entity),
+ ids: []const ResultId,
+
+ pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool {
+ // If any index is not in the entities set, its because its not a
+ // deduplicatable result-id. Those should be considered largest and
+ // float to the end.
+ const entity_index_a = ctx.entities.getIndex(ctx.ids[a_index]) orelse return false;
+ const entity_index_b = ctx.entities.getIndex(ctx.ids[b_index]) orelse return true;
+
+ return entity_index_a < entity_index_b;
+ }
+ };
+
+ decorations.sort(SortContext{
+ .entities = entities.unmanaged,
+ .ids = decorations.items(.target_id),
+ });
+
+ // Now go through the decorations and add the offsets to the entities list.
+ var decoration_i: u32 = 0;
+ const target_ids = decorations.items(.target_id);
+ for (entities.keys(), entities.values()) |id, *entity| {
+ entity.first_decoration = decoration_i;
+
+ // Scan ahead to the next decoration
+ while (decoration_i < target_ids.len and target_ids[decoration_i] == id) {
+ decoration_i += 1;
+ }
+ }
+
+ return ModuleInfo{
+ .entities = entities.unmanaged,
+ .operand_is_id = operand_is_id,
+ // There may be unrelated decorations at the end, so make sure to
+ // slice those off.
+ .decorations = decorations.items(.entity)[0..decoration_i],
+ };
+ }
+
+ fn entityDecorationsByIndex(self: ModuleInfo, index: usize) []const Entity {
+ const values = self.entities.values();
+ const first_decoration = values[index].first_decoration;
+ if (index == values.len - 1) {
+ return self.decorations[first_decoration..];
+ } else {
+ const next_first_decoration = values[index + 1].first_decoration;
+ return self.decorations[first_decoration..next_first_decoration];
+ }
+ }
+};
+
+const EntityContext = struct {
+ a: Allocator,
+ ptr_map_a: std.AutoArrayHashMapUnmanaged(ResultId, void) = .{},
+ ptr_map_b: std.AutoArrayHashMapUnmanaged(ResultId, void) = .{},
+ info: *const ModuleInfo,
+ binary: *const BinaryModule,
+
+ fn deinit(self: *EntityContext) void {
+ self.ptr_map_a.deinit(self.a);
+ self.ptr_map_b.deinit(self.a);
+
+ self.* = undefined;
+ }
+
+ fn equalizeMapCapacity(self: *EntityContext) !void {
+ const cap = @max(self.ptr_map_a.capacity(), self.ptr_map_b.capacity());
+ try self.ptr_map_a.ensureTotalCapacity(self.a, cap);
+ try self.ptr_map_b.ensureTotalCapacity(self.a, cap);
+ }
+
+ fn hash(self: *EntityContext, id: ResultId) !u64 {
+ var hasher = std.hash.Wyhash.init(0);
+ self.ptr_map_a.clearRetainingCapacity();
+ try self.hashInner(&hasher, id);
+ return hasher.final();
+ }
+
+ fn hashInner(self: *EntityContext, hasher: *std.hash.Wyhash, id: ResultId) error{OutOfMemory}!void {
+ const index = self.info.entities.getIndex(id) orelse {
+ // Index unknown, the type or constant may depend on another result-id
+ // that couldn't be deduplicated and so it wasn't added to info.entities.
+ // In this case, just has the ID itself.
+ std.hash.autoHash(hasher, id);
+ return;
+ };
+
+ const entity = self.info.entities.values()[index];
+
+ if (entity.kind == .OpTypePointer) {
+ // This may be either a pointer that is forward-referenced in the future,
+ // or a forward reference to a pointer.
+ const entry = try self.ptr_map_a.getOrPut(self.a, id);
+ if (entry.found_existing) {
+ // Pointer already seen. Hash the index instead of recursing into its children.
+ std.hash.autoHash(hasher, entry.index);
+ return;
+ }
+ }
+
+ try self.hashEntity(hasher, entity);
+
+ // Process decorations.
+ const decorations = self.info.entityDecorationsByIndex(index);
+ for (decorations) |decoration| {
+ try self.hashEntity(hasher, decoration);
+ }
+ }
+
+ fn hashEntity(self: *EntityContext, hasher: *std.hash.Wyhash, entity: ModuleInfo.Entity) !void {
+ std.hash.autoHash(hasher, entity.kind);
+ // Process operands
+ const operands = self.binary.instructions[entity.first_operand..][0..entity.num_operands];
+ for (operands, 0..) |operand, i| {
+ if (i == entity.result_id_index) {
+ // Not relevant, skip...
+ continue;
+ } else if (self.info.operand_is_id.isSet(entity.first_operand + i)) {
+ // Operand is ID
+ try self.hashInner(hasher, @enumFromInt(operand));
+ } else {
+ // Operand is merely data
+ std.hash.autoHash(hasher, operand);
+ }
+ }
+ }
+
+ fn eql(self: *EntityContext, a: ResultId, b: ResultId) !bool {
+ self.ptr_map_a.clearRetainingCapacity();
+ self.ptr_map_b.clearRetainingCapacity();
+
+ return try self.eqlInner(a, b);
+ }
+
+ fn eqlInner(self: *EntityContext, id_a: ResultId, id_b: ResultId) error{OutOfMemory}!bool {
+ const maybe_index_a = self.info.entities.getIndex(id_a);
+ const maybe_index_b = self.info.entities.getIndex(id_b);
+
+ if (maybe_index_a == null and maybe_index_b == null) {
+ // Both indices unknown. In this case the type or constant
+ // may depend on another result-id that couldn't be deduplicated
+ // (so it wasn't added to info.entities). In this case, that particular
+ // result-id should be the same one.
+ return id_a == id_b;
+ }
+
+ const index_a = maybe_index_a orelse return false;
+ const index_b = maybe_index_b orelse return false;
+
+ const entity_a = self.info.entities.values()[index_a];
+ const entity_b = self.info.entities.values()[index_b];
+
+ if (entity_a.kind == .OpTypePointer) {
+ // May be a forward reference, or should be saved as a potential
+ // forward reference in the future. Whatever the case, it should
+ // be the same for both a and b.
+ const entry_a = try self.ptr_map_a.getOrPut(self.a, id_a);
+ const entry_b = try self.ptr_map_b.getOrPut(self.a, id_b);
+
+ if (entry_a.found_existing != entry_b.found_existing) return false;
+ if (entry_a.index != entry_b.index) return false;
+
+ if (entry_a.found_existing) {
+ // No need to recurse.
+ return true;
+ }
+ }
+
+ if (!try self.eqlEntities(entity_a, entity_b)) {
+ return false;
+ }
+
+ // Compare decorations.
+ const decorations_a = self.info.entityDecorationsByIndex(index_a);
+ const decorations_b = self.info.entityDecorationsByIndex(index_b);
+ if (decorations_a.len != decorations_b.len) {
+ return false;
+ }
+
+ for (decorations_a, decorations_b) |decoration_a, decoration_b| {
+ if (!try self.eqlEntities(decoration_a, decoration_b)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ fn eqlEntities(self: *EntityContext, entity_a: ModuleInfo.Entity, entity_b: ModuleInfo.Entity) !bool {
+ if (entity_a.kind != entity_b.kind) {
+ return false;
+ } else if (entity_a.result_id_index != entity_a.result_id_index) {
+ return false;
+ }
+
+ const operands_a = self.binary.instructions[entity_a.first_operand..][0..entity_a.num_operands];
+ const operands_b = self.binary.instructions[entity_b.first_operand..][0..entity_b.num_operands];
+
+ // Note: returns false for operands that have explicit defaults in optional operands... oh well
+ if (operands_a.len != operands_b.len) {
+ return false;
+ }
+
+ for (operands_a, operands_b, 0..) |operand_a, operand_b, i| {
+ const a_is_id = self.info.operand_is_id.isSet(entity_a.first_operand + i);
+ const b_is_id = self.info.operand_is_id.isSet(entity_b.first_operand + i);
+ if (a_is_id != b_is_id) {
+ return false;
+ } else if (i == entity_a.result_id_index) {
+ // result-id for both...
+ continue;
+ } else if (a_is_id) {
+ // Both are IDs, so recurse.
+ if (!try self.eqlInner(@enumFromInt(operand_a), @enumFromInt(operand_b))) {
+ return false;
+ }
+ } else if (operand_a != operand_b) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+};
+
+/// This struct is a wrapper around EntityContext that adapts it for
+/// use in a hash map. Because EntityContext allocates, it cannot be
+/// used. This wrapper simply assumes that the maps have been allocated
+/// the max amount of memory they are going to use.
+/// This is done by pre-hashing all keys.
+const EntityHashContext = struct {
+ entity_context: *EntityContext,
+
+ pub fn hash(self: EntityHashContext, key: ResultId) u64 {
+ return self.entity_context.hash(key) catch unreachable;
+ }
+
+ pub fn eql(self: EntityHashContext, a: ResultId, b: ResultId) bool {
+ return self.entity_context.eql(a, b) catch unreachable;
+ }
+};
+
+pub fn run(parser: *BinaryModule.Parser, binary: *BinaryModule) !void {
+ var arena = std.heap.ArenaAllocator.init(parser.a);
+ defer arena.deinit();
+ const a = arena.allocator();
+
+ const info = try ModuleInfo.parse(a, parser, binary.*);
+
+ // Hash all keys once so that the maps can be allocated the right size.
+ var ctx = EntityContext{
+ .a = a,
+ .info = &info,
+ .binary = binary,
+ };
+ for (info.entities.keys()) |id| {
+ _ = try ctx.hash(id);
+ }
+
+ // hash only uses ptr_map_a, so allocate ptr_map_b too
+ try ctx.equalizeMapCapacity();
+
+ // Figure out which entities can be deduplicated.
+ var map = std.HashMap(ResultId, void, EntityHashContext, 80).initContext(a, .{
+ .entity_context = &ctx,
+ });
+ var replace = std.AutoArrayHashMap(ResultId, ResultId).init(a);
+ for (info.entities.keys()) |id| {
+ const entry = try map.getOrPut(id);
+ if (entry.found_existing) {
+ try replace.putNoClobber(id, entry.key_ptr.*);
+ }
+ }
+
+ // Now process the module, and replace instructions where needed.
+ var section = Section{};
+ var it = binary.iterateInstructions();
+ var new_functions_section: ?usize = null;
+ var new_operands = std.ArrayList(u32).init(a);
+ var emitted_ptrs = std.AutoHashMap(ResultId, void).init(a);
+ while (it.next()) |inst| {
+ // Result-id can only be the first or second operand
+ const inst_spec = parser.getInstSpec(inst.opcode).?;
+
+ const maybe_result_id_offset: ?u16 = for (0..2) |i| {
+ if (inst_spec.operands.len > i and inst_spec.operands[i].kind == .IdResult) {
+ break @intCast(i);
+ }
+ } else null;
+
+ if (maybe_result_id_offset) |offset| {
+ const result_id: ResultId = @enumFromInt(inst.operands[offset]);
+ if (replace.contains(result_id)) continue;
+ }
+
+ switch (inst.opcode) {
+ .OpFunction => if (new_functions_section == null) {
+ new_functions_section = section.instructions.items.len;
+ },
+ .OpTypeForwardPointer => continue, // We re-emit these where needed
+ else => {},
+ }
+
+ switch (inst.opcode.class()) {
+ .Annotation, .Debug => {
+ // For decoration-style instructions, only emit them
+ // if the target is not removed.
+ const target: ResultId = @enumFromInt(inst.operands[0]);
+ if (replace.contains(target)) continue;
+ },
+ else => {},
+ }
+
+ // Re-emit the instruction, but replace all the IDs.
+
+ new_operands.items.len = 0;
+ try new_operands.appendSlice(inst.operands);
+
+ for (new_operands.items, 0..) |*operand, i| {
+ const is_id = info.operand_is_id.isSet(inst.offset + 1 + i);
+ if (!is_id) continue;
+
+ if (replace.get(@enumFromInt(operand.*))) |new_id| {
+ operand.* = @intFromEnum(new_id);
+ }
+
+ if (maybe_result_id_offset == null or maybe_result_id_offset.? != i) {
+ const id: ResultId = @enumFromInt(operand.*);
+ const index = info.entities.getIndex(id) orelse continue;
+ const entity = info.entities.values()[index];
+ if (entity.kind == .OpTypePointer and !emitted_ptrs.contains(id)) {
+ // Grab the pointer's storage class from its operands in the original
+ // module.
+ const storage_class: spec.StorageClass = @enumFromInt(binary.instructions[entity.first_operand + 1]);
+ try section.emit(a, .OpTypeForwardPointer, .{
+ .pointer_type = id,
+ .storage_class = storage_class,
+ });
+ try emitted_ptrs.put(id, {});
+ }
+ }
+ }
+
+ if (inst.opcode == .OpTypePointer) {
+ const result_id: ResultId = @enumFromInt(new_operands.items[maybe_result_id_offset.?]);
+ try emitted_ptrs.put(result_id, {});
+ }
+
+ try section.emitRawInstruction(a, inst.opcode, new_operands.items);
+ }
+
+ for (replace.keys()) |key| {
+ _ = binary.ext_inst_map.remove(key);
+ _ = binary.arith_type_width.remove(key);
+ }
+
+ binary.instructions = try parser.a.dupe(Word, section.toWords());
+ binary.sections.functions = new_functions_section orelse binary.instructions.len;
+}