src/link/SpirV/BinaryModule.zig


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464

const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.spirv_parse);

const spec = @import("../../codegen/spirv/spec.zig");
const Opcode = spec.Opcode;
const Word = spec.Word;
const InstructionSet = spec.InstructionSet;
const ResultId = spec.Id;

const BinaryModule = @This();

pub const header_words = 5;

/// The module SPIR-V version.
version: spec.Version,

/// The generator magic number.
generator_magic: u32,

/// The result-id bound of this SPIR-V module.
id_bound: u32,

/// The instructions of this module. This does not contain the header.
instructions: []const Word,

/// Maps OpExtInstImport result-ids to their InstructionSet.
ext_inst_map: std.AutoHashMapUnmanaged(ResultId, InstructionSet),

/// This map contains the width of arithmetic types (OpTypeInt and
/// OpTypeFloat). We need this information to correctly parse the operands
/// of Op(Spec)Constant and OpSwitch.
arith_type_width: std.AutoHashMapUnmanaged(ResultId, u16),

/// The starting offsets of some sections
sections: struct {
    functions: usize,
},

pub fn deinit(self: *BinaryModule, a: Allocator) void {
    self.ext_inst_map.deinit(a);
    self.arith_type_width.deinit(a);
    self.* = undefined;
}

pub fn iterateInstructions(self: BinaryModule) Instruction.Iterator {
    return Instruction.Iterator.init(self.instructions, 0);
}

pub fn iterateInstructionsFrom(self: BinaryModule, offset: usize) Instruction.Iterator {
    return Instruction.Iterator.init(self.instructions, offset);
}

pub fn instructionAt(self: BinaryModule, offset: usize) Instruction {
    var it = self.iterateInstructionsFrom(offset);
    return it.next().?;
}

pub fn finalize(self: BinaryModule, a: Allocator) ![]Word {
    const result = try a.alloc(Word, 5 + self.instructions.len);
    errdefer a.free(result);

    result[0] = spec.magic_number;
    result[1] = @bitCast(self.version);
    result[2] = @bitCast(self.generator_magic);
    result[3] = self.id_bound;
    result[4] = 0; // Schema

    @memcpy(result[5..], self.instructions);
    return result;
}

/// Errors that can be raised when the module is not correct.
/// Note that the parser doesn't validate SPIR-V modules by a
/// long shot. It only yields errors that critically prevent
/// further analysis of the module.
pub const ParseError = error{
    /// Raised when the module doesn't start with the SPIR-V magic.
    /// This usually means that the module isn't actually SPIR-V.
    InvalidMagic,
    /// Raised when the module has an invalid "physical" format:
    /// For example when the header is incomplete, or an instruction
    /// has an illegal format.
    InvalidPhysicalFormat,
    /// OpExtInstImport was used with an unknown extension string.
    InvalidExtInstImport,
    /// The module had an instruction with an invalid (unknown) opcode.
    InvalidOpcode,
    /// An instruction's operands did not conform to the SPIR-V specification
    /// for that instruction.
    InvalidOperands,
    /// A result-id was declared more than once.
    DuplicateId,
    /// Some ID did not resolve.
    InvalidId,
    /// This opcode or instruction is not supported yet.
    UnsupportedOperation,
    /// Parser ran out of memory.
    OutOfMemory,
};

pub const Instruction = struct {
    pub const Iterator = struct {
        words: []const Word,
        index: usize = 0,
        offset: usize = 0,

        pub fn init(words: []const Word, start_offset: usize) Iterator {
            return .{ .words = words, .offset = start_offset };
        }

        pub fn next(self: *Iterator) ?Instruction {
            if (self.offset >= self.words.len) return null;

            const instruction_len = self.words[self.offset] >> 16;
            defer self.offset += instruction_len;
            defer self.index += 1;
            assert(instruction_len != 0);
            assert(self.offset < self.words.len);

            return Instruction{
                .opcode = @enumFromInt(self.words[self.offset] & 0xFFFF),
                .index = self.index,
                .offset = self.offset,
                .operands = self.words[self.offset..][1..instruction_len],
            };
        }
    };

    /// The opcode for this instruction.
    opcode: Opcode,
    /// The instruction's index.
    index: usize,
    /// The instruction's word offset in the module.
    offset: usize,
    /// The raw (unparsed) operands for this instruction.
    operands: []const Word,
};

/// This parser contains information (acceleration tables)
/// that can be persisted across different modules. This is
/// used to initialize the module, and is also used when
/// further analyzing it.
pub const Parser = struct {
    /// The allocator used to allocate this parser's structures,
    /// and also the structures of any parsed module.
    a: Allocator,

    /// Maps (instruction set, opcode) => instruction index (for instruction set)
    opcode_table: std.AutoHashMapUnmanaged(u32, u16) = .empty,

    pub fn init(a: Allocator) !Parser {
        var self = Parser{
            .a = a,
        };
        errdefer self.deinit();

        inline for (std.meta.tags(InstructionSet)) |set| {
            const instructions = set.instructions();
            try self.opcode_table.ensureUnusedCapacity(a, @intCast(instructions.len));
            for (instructions, 0..) |inst, i| {
                // Note: Some instructions may alias another. In this case we don't really care
                // which one is first: they all (should) have the same operands anyway. Just pick
                // the first, which is usually the core, KHR or EXT variant.
                const entry = self.opcode_table.getOrPutAssumeCapacity(mapSetAndOpcode(set, @intCast(inst.opcode)));
                if (!entry.found_existing) {
                    entry.value_ptr.* = @intCast(i);
                }
            }
        }

        return self;
    }

    pub fn deinit(self: *Parser) void {
        self.opcode_table.deinit(self.a);
    }

    fn mapSetAndOpcode(set: InstructionSet, opcode: u16) u32 {
        return (@as(u32, @intFromEnum(set)) << 16) | opcode;
    }

    pub fn getInstSpec(self: Parser, opcode: Opcode) ?spec.Instruction {
        const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(opcode))) orelse return null;
        return InstructionSet.core.instructions()[index];
    }

    pub fn parse(self: *Parser, module: []const u32) ParseError!BinaryModule {
        if (module[0] != spec.magic_number) {
            return error.InvalidMagic;
        } else if (module.len < header_words) {
            log.err("module only has {}/{} header words", .{ module.len, header_words });
            return error.InvalidPhysicalFormat;
        }

        var binary = BinaryModule{
            .version = @bitCast(module[1]),
            .generator_magic = @bitCast(module[2]),
            .id_bound = module[3],
            .instructions = module[header_words..],
            .ext_inst_map = .{},
            .arith_type_width = .{},
            .sections = undefined,
        };

        var maybe_function_section: ?usize = null;

        // First pass through the module to verify basic structure and
        // to gather some initial stuff for more detailed analysis.
        // We want to check some stuff that Instruction.Iterator is no good for,
        // so just iterate manually.
        var offset: usize = 0;
        while (offset < binary.instructions.len) {
            const len = binary.instructions[offset] >> 16;
            if (len == 0 or len + offset > binary.instructions.len) {
                log.err("invalid instruction format: len={}, end={}, module len={}", .{ len, len + offset, binary.instructions.len });
                return error.InvalidPhysicalFormat;
            }
            defer offset += len;

            // We can't really efficiently use non-exhaustive enums here, because we would
            // need to manually write out all valid cases. Since we have this map anyway, just
            // use that.
            const opcode: Opcode = @enumFromInt(@as(u16, @truncate(binary.instructions[offset])));
            const inst_spec = self.getInstSpec(opcode) orelse {
                log.err("invalid opcode for core set: {}", .{@intFromEnum(opcode)});
                return error.InvalidOpcode;
            };

            const operands = binary.instructions[offset..][1..len];
            switch (opcode) {
                .OpExtInstImport => {
                    const set_name = std.mem.sliceTo(std.mem.sliceAsBytes(operands[1..]), 0);
                    const set = std.meta.stringToEnum(InstructionSet, set_name) orelse {
                        log.err("invalid instruction set '{s}'", .{set_name});
                        return error.InvalidExtInstImport;
                    };
                    if (set == .core) return error.InvalidExtInstImport;
                    try binary.ext_inst_map.put(self.a, @enumFromInt(operands[0]), set);
                },
                .OpTypeInt, .OpTypeFloat => {
                    const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[0]));
                    if (entry.found_existing) return error.DuplicateId;
                    entry.value_ptr.* = std.math.cast(u16, operands[1]) orelse return error.InvalidOperands;
                },
                .OpFunction => if (maybe_function_section == null) {
                    maybe_function_section = offset;
                },
                else => {},
            }

            // OpSwitch takes a value as argument, not an OpType... hence we need to populate arith_type_width
            // with ALL operations that return an int or float.
            const spec_operands = inst_spec.operands;
            if (spec_operands.len >= 2 and
                spec_operands[0].kind == .id_result_type and
                spec_operands[1].kind == .id_result)
            {
                if (operands.len < 2) return error.InvalidOperands;
                if (binary.arith_type_width.get(@enumFromInt(operands[0]))) |width| {
                    const entry = try binary.arith_type_width.getOrPut(self.a, @enumFromInt(operands[1]));
                    if (entry.found_existing) return error.DuplicateId;
                    entry.value_ptr.* = width;
                }
            }
        }

        binary.sections = .{
            .functions = maybe_function_section orelse binary.instructions.len,
        };

        return binary;
    }

    /// Parse offsets in the instruction that contain result-ids.
    /// Returned offsets are relative to inst.operands.
    /// Returns in an arraylist to armortize allocations.
    pub fn parseInstructionResultIds(
        self: *Parser,
        binary: BinaryModule,
        inst: Instruction,
        offsets: *std.array_list.Managed(u16),
    ) !void {
        const index = self.opcode_table.get(mapSetAndOpcode(.core, @intFromEnum(inst.opcode))).?;
        const operands = InstructionSet.core.instructions()[index].operands;

        var offset: usize = 0;
        switch (inst.opcode) {
            .OpSpecConstantOp => {
                assert(operands[0].kind == .id_result_type);
                assert(operands[1].kind == .id_result);
                offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);

                if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
                const spec_opcode = std.math.cast(u16, inst.operands[offset]) orelse return error.InvalidPhysicalFormat;
                const spec_index = self.opcode_table.get(mapSetAndOpcode(.core, spec_opcode)) orelse
                    return error.InvalidPhysicalFormat;
                const spec_operands = InstructionSet.core.instructions()[spec_index].operands;
                assert(spec_operands[0].kind == .id_result_type);
                assert(spec_operands[1].kind == .id_result);
                offset = try self.parseOperandsResultIds(binary, inst, spec_operands[2..], offset + 1, offsets);
            },
            .OpExtInst => {
                assert(operands[0].kind == .id_result_type);
                assert(operands[1].kind == .id_result);
                offset = try self.parseOperandsResultIds(binary, inst, operands[0..2], offset, offsets);

                if (offset + 1 >= inst.operands.len) return error.InvalidPhysicalFormat;
                const set_id: ResultId = @enumFromInt(inst.operands[offset]);
                try offsets.append(@intCast(offset));
                const set = binary.ext_inst_map.get(set_id) orelse {
                    log.err("invalid instruction set {}", .{@intFromEnum(set_id)});
                    return error.InvalidId;
                };
                const ext_opcode = std.math.cast(u16, inst.operands[offset + 1]) orelse return error.InvalidPhysicalFormat;
                const ext_index = self.opcode_table.get(mapSetAndOpcode(set, ext_opcode)) orelse
                    return error.InvalidPhysicalFormat;
                const ext_operands = set.instructions()[ext_index].operands;
                offset = try self.parseOperandsResultIds(binary, inst, ext_operands, offset + 2, offsets);
            },
            else => {
                offset = try self.parseOperandsResultIds(binary, inst, operands, offset, offsets);
            },
        }

        if (offset != inst.operands.len) return error.InvalidPhysicalFormat;
    }

    fn parseOperandsResultIds(
        self: *Parser,
        binary: BinaryModule,
        inst: Instruction,
        operands: []const spec.Operand,
        start_offset: usize,
        offsets: *std.array_list.Managed(u16),
    ) !usize {
        var offset = start_offset;
        for (operands) |operand| {
            offset = try self.parseOperandResultIds(binary, inst, operand, offset, offsets);
        }
        return offset;
    }

    fn parseOperandResultIds(
        self: *Parser,
        binary: BinaryModule,
        inst: Instruction,
        operand: spec.Operand,
        start_offset: usize,
        offsets: *std.array_list.Managed(u16),
    ) !usize {
        var offset = start_offset;
        switch (operand.quantifier) {
            .variadic => while (offset < inst.operands.len) {
                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
            },
            .optional => if (offset < inst.operands.len) {
                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
            },
            .required => {
                offset = try self.parseOperandKindResultIds(binary, inst, operand.kind, offset, offsets);
            },
        }
        return offset;
    }

    fn parseOperandKindResultIds(
        self: *Parser,
        binary: BinaryModule,
        inst: Instruction,
        kind: spec.OperandKind,
        start_offset: usize,
        offsets: *std.array_list.Managed(u16),
    ) !usize {
        var offset = start_offset;
        if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;

        switch (kind.category()) {
            .bit_enum => {
                const mask = inst.operands[offset];
                offset += 1;
                for (kind.enumerants()) |enumerant| {
                    if ((mask & enumerant.value) != 0) {
                        for (enumerant.parameters) |param_kind| {
                            offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
                        }
                    }
                }
            },
            .value_enum => {
                const value = inst.operands[offset];
                offset += 1;
                for (kind.enumerants()) |enumerant| {
                    if (value == enumerant.value) {
                        for (enumerant.parameters) |param_kind| {
                            offset = try self.parseOperandKindResultIds(binary, inst, param_kind, offset, offsets);
                        }
                        break;
                    }
                }
            },
            .id => {
                try offsets.append(@intCast(offset));
                offset += 1;
            },
            else => switch (kind) {
                .literal_integer, .literal_float => offset += 1,
                .literal_string => while (true) {
                    if (offset >= inst.operands.len) return error.InvalidPhysicalFormat;
                    const word = inst.operands[offset];
                    offset += 1;

                    if (word & 0xFF000000 == 0 or
                        word & 0x00FF0000 == 0 or
                        word & 0x0000FF00 == 0 or
                        word & 0x000000FF == 0)
                    {
                        break;
                    }
                },
                .literal_context_dependent_number => {
                    assert(inst.opcode == .OpConstant or inst.opcode == .OpSpecConstantOp);
                    const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
                        log.err("invalid LiteralContextDependentNumber type {}", .{inst.operands[0]});
                        return error.InvalidId;
                    };
                    offset += switch (bit_width) {
                        1...32 => 1,
                        33...64 => 2,
                        else => unreachable,
                    };
                },
                .literal_ext_inst_integer => unreachable,
                .literal_spec_constant_op_integer => unreachable,
                .pair_literal_integer_id_ref => { // Switch case
                    assert(inst.opcode == .OpSwitch);
                    const bit_width = binary.arith_type_width.get(@enumFromInt(inst.operands[0])) orelse {
                        log.err("invalid OpSwitch type {}", .{inst.operands[0]});
                        return error.InvalidId;
                    };
                    offset += switch (bit_width) {
                        1...32 => 1,
                        33...64 => 2,
                        else => unreachable,
                    };
                    try offsets.append(@intCast(offset));
                    offset += 1;
                },
                .pair_id_ref_literal_integer => {
                    try offsets.append(@intCast(offset));
                    offset += 2;
                },
                .pair_id_ref_id_ref => {
                    try offsets.append(@intCast(offset));
                    try offsets.append(@intCast(offset + 1));
                    offset += 2;
                },
                else => unreachable,
            },
        }
        return offset;
    }
};