From 239680616522c1908afb8935e5f8e644a9115403 Mon Sep 17 00:00:00 2001 From: antlilja Date: Sun, 13 Aug 2023 15:55:55 +0200 Subject: Add LLVM bitcode writer --- src/codegen/llvm/bitcode_writer.zig | 425 ++++++++++++++++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 src/codegen/llvm/bitcode_writer.zig (limited to 'src/codegen/llvm/bitcode_writer.zig') diff --git a/src/codegen/llvm/bitcode_writer.zig b/src/codegen/llvm/bitcode_writer.zig new file mode 100644 index 0000000000..76ee57c5dd --- /dev/null +++ b/src/codegen/llvm/bitcode_writer.zig @@ -0,0 +1,425 @@ +const std = @import("std"); + +pub const AbbrevOp = union(enum) { + literal: u32, // 0 + fixed: u16, // 1 + fixed_runtime: type, // 1 + vbr: u16, // 2 + char6: void, // 4 + blob: void, // 5 + array_fixed: u16, // 3, 1 + array_fixed_runtime: type, // 3, 1 + array_vbr: u16, // 3, 2 + array_char6: void, // 3, 4 +}; + +pub const Error = error{OutOfMemory}; + +pub fn BitcodeWriter(comptime types: []const type) type { + return struct { + const BcWriter = @This(); + + buffer: std.ArrayList(u32), + bit_buffer: u32 = 0, + bit_count: u5 = 0, + + widths: []const u16, + + pub fn getTypeIndex(comptime ty: type) usize { + inline for (types, 0..) |t, i| { + if (t == ty) return i; + } + unreachable; + } + + pub fn init(allocator: std.mem.Allocator, widths: []const u16) BcWriter { + std.debug.assert(widths.len == types.len); + return .{ + .buffer = std.ArrayList(u32).init(allocator), + .widths = widths, + }; + } + + pub fn deinit(self: BcWriter) void { + self.buffer.deinit(); + } + + pub fn toSlice(self: BcWriter) []const u32 { + std.debug.assert(self.bit_count == 0); + return self.buffer.items; + } + + pub fn length(self: BcWriter) usize { + std.debug.assert(self.bit_count == 0); + return self.buffer.items.len; + } + + pub fn writeBits(self: *BcWriter, value: anytype, bits: u16) Error!void { + if (bits == 0) return; + + var in_buffer = bufValue(value, 32); + var in_bits = bits; + + // Store input bits in buffer if they fit otherwise store as many as possible and flush + if (self.bit_count > 0) { + const bits_remaining = 31 - self.bit_count + 1; + const n: u5 = @intCast(@min(bits_remaining, in_bits)); + const v = @as(u32, @truncate(in_buffer)) << self.bit_count; + self.bit_buffer |= v; + in_buffer >>= n; + + self.bit_count +%= n; + in_bits -= n; + + if (self.bit_count != 0) return; + try self.buffer.append(self.bit_buffer); + self.bit_buffer = 0; + } + + // Write 32-bit chunks of input bits + while (in_bits >= 32) { + try self.buffer.append(@truncate(in_buffer)); + + in_buffer >>= 31; + in_buffer >>= 1; + in_bits -= 32; + } + + // Store remaining input bits in buffer + if (in_bits > 0) { + self.bit_count = @intCast(in_bits); + self.bit_buffer = @truncate(in_buffer); + } + } + + pub fn writeVBR(self: *BcWriter, value: anytype, comptime vbr_bits: usize) Error!void { + comptime { + std.debug.assert(vbr_bits > 1); + if (@bitSizeOf(@TypeOf(value)) > 64) @compileError("Unsupported VBR block type: " ++ @typeName(@TypeOf(value))); + } + + var in_buffer = bufValue(value, vbr_bits); + + const continue_bit = @as(@TypeOf(in_buffer), 1) << @intCast(vbr_bits - 1); + const mask = continue_bit - 1; + + // If input is larger than one VBR block can store + // then store vbr_bits - 1 bits and a continue bit + while (in_buffer > mask) { + try self.writeBits(in_buffer & mask | continue_bit, vbr_bits); + in_buffer >>= @intCast(vbr_bits - 1); + } + + // Store remaining bits + try self.writeBits(in_buffer, vbr_bits); + } + + pub fn bitsVBR(_: *const BcWriter, value: anytype, comptime vbr_bits: usize) u16 { + comptime { + std.debug.assert(vbr_bits > 1); + if (@bitSizeOf(@TypeOf(value)) > 64) @compileError("Unsupported VBR block type: " ++ @typeName(@TypeOf(value))); + } + + var bits: u16 = 0; + + var in_buffer = bufValue(value, vbr_bits); + + const continue_bit = @as(@TypeOf(in_buffer), 1) << @intCast(vbr_bits - 1); + const mask = continue_bit - 1; + + // If input is larger than one VBR block can store + // then store vbr_bits - 1 bits and a continue bit + while (in_buffer > mask) { + bits += @intCast(vbr_bits); + in_buffer >>= @intCast(vbr_bits - 1); + } + + // Store remaining bits + bits += @intCast(vbr_bits); + return bits; + } + + pub fn write6BitChar(self: *BcWriter, c: u8) Error!void { + try self.writeBits(charTo6Bit(c), 6); + } + + pub fn alignTo32(self: *BcWriter) Error!void { + if (self.bit_count == 0) return; + + try self.buffer.append(self.bit_buffer); + self.bit_buffer = 0; + self.bit_count = 0; + } + + pub fn enterTopBlock(self: *BcWriter, comptime SubBlock: type) Error!BlockWriter(SubBlock) { + return BlockWriter(SubBlock).init(self, 2); + } + + fn BlockWriter(comptime Block: type) type { + return struct { + const Self = @This(); + + // The minimum abbrev id length based on the number of abbrevs present in the block + pub const abbrev_len = std.math.log2_int_ceil( + u6, + 4 + (if (@hasDecl(Block, "abbrevs")) Block.abbrevs.len else 0), + ); + + start: usize, + bitcode: *BcWriter, + + pub fn init(bitcode: *BcWriter, comptime parent_abbrev_len: u6) Error!Self { + try bitcode.writeBits(1, parent_abbrev_len); + try bitcode.writeVBR(Block.id, 8); + try bitcode.writeVBR(abbrev_len, 4); + try bitcode.alignTo32(); + + // We store the index of the block size and store a dummy value as the number of words in the block + const start = bitcode.length(); + try bitcode.writeBits(0, 32); + + // Predefine all block abbrevs + inline for (Block.abbrevs) |Abbrev| { + try defineAbbrev(bitcode, &Abbrev.ops); + } + + return .{ + .start = start, + .bitcode = bitcode, + }; + } + + pub fn enterSubBlock(self: Self, comptime SubBlock: type) Error!BlockWriter(SubBlock) { + return BlockWriter(SubBlock).init(self.bitcode, abbrev_len); + } + + pub fn end(self: *Self) Error!void { + try self.bitcode.writeBits(0, abbrev_len); + try self.bitcode.alignTo32(); + + // Set the number of words in the block at the start of the block + self.bitcode.buffer.items[self.start] = @truncate(self.bitcode.length() - self.start - 1); + } + + pub fn writeUnabbrev(self: *Self, code: u32, values: []const u64) Error!void { + try self.bitcode.writeBits(3, abbrev_len); + try self.bitcode.writeVBR(code, 6); + try self.bitcode.writeVBR(values.len, 6); + for (values) |val| { + try self.bitcode.writeVBR(val, 6); + } + } + + pub fn writeAbbrev(self: *Self, params: anytype) Error!void { + return self.writeAbbrevAdapted(params, struct { + pub fn get(_: @This(), param: anytype, comptime _: []const u8) @TypeOf(param) { + return param; + } + }{}); + } + + pub fn abbrevId(comptime Abbrev: type) u32 { + inline for (Block.abbrevs, 0..) |abbrev, i| { + if (Abbrev == abbrev) return i + 4; + } + + @compileError("Unknown abbrev: " ++ @typeName(Abbrev)); + } + + pub fn writeAbbrevAdapted( + self: *Self, + params: anytype, + adapter: anytype, + ) Error!void { + const Abbrev = @TypeOf(params); + + try self.bitcode.writeBits(comptime abbrevId(Abbrev), abbrev_len); + + const fields = std.meta.fields(Abbrev); + + // This abbreviation might only contain literals + if (fields.len == 0) return; + + comptime var field_index: usize = 0; + inline for (Abbrev.ops) |ty| { + const field_name = fields[field_index].name; + const param = @field(params, field_name); + + switch (ty) { + .literal => continue, + .fixed => |len| try self.bitcode.writeBits(adapter.get(param, field_name), len), + .fixed_runtime => |width_ty| try self.bitcode.writeBits( + adapter.get(param, field_name), + self.bitcode.widths[getTypeIndex(width_ty)], + ), + .vbr => |len| try self.bitcode.writeVBR(adapter.get(param, field_name), len), + .char6 => try self.bitcode.write6BitChar(adapter.get(param, field_name)), + .blob => { + try self.bitcode.writeVBR(param.len, 6); + try self.bitcode.alignTo32(); + for (param) |x| { + try self.bitcode.writeBits(x, 8); + } + try self.bitcode.alignTo32(); + }, + .array_fixed => |len| { + try self.bitcode.writeVBR(param.len, 6); + for (param) |x| { + try self.bitcode.writeBits(adapter.get(x, field_name), len); + } + }, + .array_fixed_runtime => |width_ty| { + try self.bitcode.writeVBR(param.len, 6); + for (param) |x| { + try self.bitcode.writeBits( + adapter.get(x, field_name), + self.bitcode.widths[getTypeIndex(width_ty)], + ); + } + }, + .array_vbr => |len| { + try self.bitcode.writeVBR(param.len, 6); + for (param) |x| { + try self.bitcode.writeVBR(adapter.get(x, field_name), len); + } + }, + .array_char6 => { + try self.bitcode.writeVBR(param.len, 6); + for (param) |x| { + try self.bitcode.write6BitChar(adapter.get(x, field_name)); + } + }, + } + field_index += 1; + if (field_index == fields.len) break; + } + } + + fn defineAbbrev(bitcode: *BcWriter, comptime ops: []const AbbrevOp) Error!void { + try bitcode.writeBits(2, abbrev_len); + + // ops.len is not accurate because arrays are actually two ops + try bitcode.writeVBR(blk: { + var count: usize = 0; + inline for (ops) |op| { + count += switch (op) { + .literal, .fixed, .fixed_runtime, .vbr, .char6, .blob => 1, + .array_fixed, .array_fixed_runtime, .array_vbr, .array_char6 => 2, + }; + } + break :blk count; + }, 5); + + inline for (ops) |op| { + switch (op) { + .literal => |value| { + try bitcode.writeBits(1, 1); + try bitcode.writeVBR(value, 8); + }, + .fixed => |width| { + try bitcode.writeBits(0, 1); + try bitcode.writeBits(1, 3); + try bitcode.writeVBR(width, 5); + }, + .fixed_runtime => |width_ty| { + try bitcode.writeBits(0, 1); + try bitcode.writeBits(1, 3); + try bitcode.writeVBR(bitcode.widths[getTypeIndex(width_ty)], 5); + }, + .vbr => |width| { + try bitcode.writeBits(0, 1); + try bitcode.writeBits(2, 3); + try bitcode.writeVBR(width, 5); + }, + .char6 => { + try bitcode.writeBits(0, 1); + try bitcode.writeBits(4, 3); + }, + .blob => { + try bitcode.writeBits(0, 1); + try bitcode.writeBits(5, 3); + }, + .array_fixed => |width| { + // Array op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(3, 3); + + // Fixed or VBR op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(1, 3); + try bitcode.writeVBR(width, 5); + }, + .array_fixed_runtime => |width_ty| { + // Array op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(3, 3); + + // Fixed or VBR op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(1, 3); + try bitcode.writeVBR(bitcode.widths[getTypeIndex(width_ty)], 5); + }, + .array_vbr => |width| { + // Array op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(3, 3); + + // Fixed or VBR op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(2, 3); + try bitcode.writeVBR(width, 5); + }, + .array_char6 => { + // Array op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(3, 3); + + // Char6 op + try bitcode.writeBits(0, 1); + try bitcode.writeBits(4, 3); + }, + } + } + } + }; + } + }; +} + +fn charTo6Bit(c: u8) u8 { + return switch (c) { + 'a'...'z' => c - 'a', + 'A'...'Z' => c - 'A' + 26, + '0'...'9' => c - '0' + 52, + '.' => 62, + '_' => 63, + else => @panic("Failed to encode byte as 6-bit char"), + }; +} + +fn BufType(comptime T: type, comptime min_len: usize) type { + return std.meta.Int(.unsigned, @max(min_len, @bitSizeOf(switch (@typeInfo(T)) { + .ComptimeInt => u32, + .Int => |info| if (info.signedness == .unsigned) + T + else + @compileError("Unsupported type: " ++ @typeName(T)), + .Enum => |info| info.tag_type, + .Bool => u1, + .Struct => |info| switch (info.layout) { + .Auto, .Extern => @compileError("Unsupported type: " ++ @typeName(T)), + .Packed => std.meta.Int(.unsigned, @bitSizeOf(T)), + }, + else => @compileError("Unsupported type: " ++ @typeName(T)), + }))); +} + +fn bufValue(value: anytype, comptime min_len: usize) BufType(@TypeOf(value), min_len) { + return switch (@typeInfo(@TypeOf(value))) { + .ComptimeInt, .Int => @intCast(value), + .Enum => @intFromEnum(value), + .Bool => @intFromBool(value), + .Struct => @intCast(@as(std.meta.Int(.unsigned, @bitSizeOf(@TypeOf(value))), @bitCast(value))), + else => unreachable, + }; +} -- cgit v1.2.3 From edb6486b3bf7a1c333d7cc3348f88ab121b72830 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 24 Feb 2024 16:59:00 +0100 Subject: BitcodeWriter: cleanup type widths --- src/codegen/llvm/Builder.zig | 2 +- src/codegen/llvm/bitcode_writer.zig | 20 ++++++++------------ 2 files changed, 9 insertions(+), 13 deletions(-) (limited to 'src/codegen/llvm/bitcode_writer.zig') diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig index 1fd18d0912..a5aeb7dee3 100644 --- a/src/codegen/llvm/Builder.zig +++ b/src/codegen/llvm/Builder.zig @@ -12944,7 +12944,7 @@ fn debugConstantAssumeCapacity(self: *Builder, constant: Constant) Metadata { pub fn toBitcode(self: *Builder, allocator: Allocator) bitcode_writer.Error![]const u32 { const BitcodeWriter = bitcode_writer.BitcodeWriter(&.{ Type, FunctionAttributes }); - var bitcode = BitcodeWriter.init(allocator, &.{ + var bitcode = BitcodeWriter.init(allocator, .{ std.math.log2_int_ceil(usize, self.type_items.items.len), std.math.log2_int_ceil(usize, 1 + self.function_attributes_set.count()), }); diff --git a/src/codegen/llvm/bitcode_writer.zig b/src/codegen/llvm/bitcode_writer.zig index 76ee57c5dd..bfb406d087 100644 --- a/src/codegen/llvm/bitcode_writer.zig +++ b/src/codegen/llvm/bitcode_writer.zig @@ -23,17 +23,13 @@ pub fn BitcodeWriter(comptime types: []const type) type { bit_buffer: u32 = 0, bit_count: u5 = 0, - widths: []const u16, + widths: [types.len]u16, - pub fn getTypeIndex(comptime ty: type) usize { - inline for (types, 0..) |t, i| { - if (t == ty) return i; - } - unreachable; + pub fn getTypeWidth(self: BcWriter, comptime Type: type) u16 { + return self.widths[comptime std.mem.indexOfScalar(type, types, Type).?]; } - pub fn init(allocator: std.mem.Allocator, widths: []const u16) BcWriter { - std.debug.assert(widths.len == types.len); + pub fn init(allocator: std.mem.Allocator, widths: [types.len]u16) BcWriter { return .{ .buffer = std.ArrayList(u32).init(allocator), .widths = widths, @@ -250,7 +246,7 @@ pub fn BitcodeWriter(comptime types: []const type) type { .fixed => |len| try self.bitcode.writeBits(adapter.get(param, field_name), len), .fixed_runtime => |width_ty| try self.bitcode.writeBits( adapter.get(param, field_name), - self.bitcode.widths[getTypeIndex(width_ty)], + self.bitcode.getTypeWidth(width_ty), ), .vbr => |len| try self.bitcode.writeVBR(adapter.get(param, field_name), len), .char6 => try self.bitcode.write6BitChar(adapter.get(param, field_name)), @@ -273,7 +269,7 @@ pub fn BitcodeWriter(comptime types: []const type) type { for (param) |x| { try self.bitcode.writeBits( adapter.get(x, field_name), - self.bitcode.widths[getTypeIndex(width_ty)], + self.bitcode.getTypeWidth(width_ty), ); } }, @@ -324,7 +320,7 @@ pub fn BitcodeWriter(comptime types: []const type) type { .fixed_runtime => |width_ty| { try bitcode.writeBits(0, 1); try bitcode.writeBits(1, 3); - try bitcode.writeVBR(bitcode.widths[getTypeIndex(width_ty)], 5); + try bitcode.writeVBR(bitcode.getTypeWidth(width_ty), 5); }, .vbr => |width| { try bitcode.writeBits(0, 1); @@ -357,7 +353,7 @@ pub fn BitcodeWriter(comptime types: []const type) type { // Fixed or VBR op try bitcode.writeBits(0, 1); try bitcode.writeBits(1, 3); - try bitcode.writeVBR(bitcode.widths[getTypeIndex(width_ty)], 5); + try bitcode.writeVBR(bitcode.getTypeWidth(width_ty), 5); }, .array_vbr => |width| { // Array op -- cgit v1.2.3