const Wasm = @This(); const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const fs = std.fs; const leb = std.debug.leb; const Module = @import("../Module.zig"); const codegen = @import("../codegen/wasm.zig"); const link = @import("../link.zig"); /// Various magic numbers defined by the wasm spec const spec = struct { const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1 const custom_id = 0; const types_id = 1; const imports_id = 2; const funcs_id = 3; const tables_id = 4; const memories_id = 5; const globals_id = 6; const exports_id = 7; const start_id = 8; const elements_id = 9; const code_id = 10; const data_id = 11; }; pub const base_tag = link.File.Tag.wasm; pub const FnData = struct { funcidx: u32, }; base: link.File, types: Types, funcs: Funcs, exports: Exports, /// Array over the section structs used in the various sections above to /// allow iteration when shifting sections to make space. /// TODO: this should eventually be size 11 when we use all the sections. sections: [4]*Section, pub fn openPath(allocator: *Allocator, dir: fs.Dir, sub_path: []const u8, options: link.Options) !*link.File { assert(options.object_format == .wasm); // TODO: read the file and keep vaild parts instead of truncating const file = try dir.createFile(sub_path, .{ .truncate = true, .read = true }); errdefer file.close(); const wasm = try allocator.create(Wasm); errdefer allocator.destroy(wasm); try file.writeAll(&(spec.magic ++ spec.version)); // TODO: this should vary depending on the section and be less arbitrary const size = 1024; const offset = @sizeOf(@TypeOf(spec.magic ++ spec.version)); wasm.* = .{ .base = .{ .tag = .wasm, .options = options, .file = file, .allocator = allocator, }, .types = try Types.init(file, offset, size), .funcs = try Funcs.init(file, offset + size, size, offset + 3 * size, size), .exports = try Exports.init(file, offset + 2 * size, size), // These must be ordered as they will appear in the output file .sections = [_]*Section{ &wasm.types.typesec.section, &wasm.funcs.funcsec, &wasm.exports.exportsec, &wasm.funcs.codesec.section, }, }; try file.setEndPos(offset + 4 * size); return &wasm.base; } pub fn deinit(self: *Wasm) void { self.types.deinit(); self.funcs.deinit(); } pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void { if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn) return error.TODOImplementNonFnDeclsForWasm; if (decl.fn_link.wasm) |fn_data| { self.funcs.free(fn_data.funcidx); } var buf = std.ArrayList(u8).init(self.base.allocator); defer buf.deinit(); try codegen.genFunctype(&buf, decl); const typeidx = try self.types.new(buf.items); buf.items.len = 0; try codegen.genCode(&buf, decl); const funcidx = try self.funcs.new(typeidx, buf.items); decl.fn_link.wasm = .{ .funcidx = funcidx }; // TODO: we should be more smart and set this only when needed self.exports.dirty = true; } pub fn updateDeclExports( self: *Wasm, module: *Module, decl: *const Module.Decl, exports: []const *Module.Export, ) !void { self.exports.dirty = true; } pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void { // TODO: remove this assert when non-function Decls are implemented assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn); if (decl.fn_link.wasm) |fn_data| { self.funcs.free(fn_data.funcidx); decl.fn_link.wasm = null; } } pub fn flush(self: *Wasm, module: *Module) !void { if (self.exports.dirty) try self.exports.writeAll(module); } /// This struct describes the location of a named section + custom section /// padding in the output file. This is all the data we need to allow for /// shifting sections around when padding runs out. const Section = struct { /// The size of a section header: 1 byte section id + 5 bytes /// for the fixed-width ULEB128 encoded contents size. const header_size = 1 + 5; /// Offset of the section id byte from the start of the file. offset: u64, /// Size of the section, including the header and directly /// following custom section used for padding if any. size: u64, /// Resize the usable part of the section, handling the following custom /// section used for padding. If there is not enough padding left, shift /// all following sections to make space. Takes the current and target /// contents sizes of the section as arguments. fn resize(self: *Section, file: fs.File, current: u32, target: u32) !void { // Section header + target contents size + custom section header // + custom section name + empty custom section > owned chunk of the file if (header_size + target + header_size + 1 + 0 > self.size) return error.TODOImplementSectionShifting; const new_custom_start = self.offset + header_size + target; const new_custom_contents_size = self.size - target - 2 * header_size; assert(new_custom_contents_size >= 1); // +1 for the name of the custom section, which we set to an empty string var custom_header: [header_size + 1]u8 = undefined; custom_header[0] = spec.custom_id; leb.writeUnsignedFixed(5, custom_header[1..header_size], @intCast(u32, new_custom_contents_size)); custom_header[header_size] = 0; try file.pwriteAll(&custom_header, new_custom_start); } }; /// This can be used to manage the contents of any section which uses a vector /// of contents. This interface maintains index stability while allowing for /// reuse of "dead" indexes. const VecSection = struct { /// Represents a single entry in the vector (e.g. a type in the type section) const Entry = struct { /// Offset from the start of the section contents in bytes offset: u32, /// Size in bytes of the entry size: u32, }; section: Section, /// Size in bytes of the contents of the section. Does not include /// the "header" containing the section id and this value. contents_size: u32, /// List of all entries in the contents of the section. entries: std.ArrayListUnmanaged(Entry) = std.ArrayListUnmanaged(Entry){}, /// List of indexes of unreferenced entries which may be /// overwritten and reused. dead_list: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){}, /// Write the headers of the section and custom padding section fn init(comptime section_id: u8, file: fs.File, offset: u64, initial_size: u64) !VecSection { // section id, section size, empty vector, custom section id, // custom section size, empty custom section name var initial_data: [1 + 5 + 5 + 1 + 5 + 1]u8 = undefined; assert(initial_size >= initial_data.len); comptime var i = 0; initial_data[i] = section_id; i += 1; leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 5); i += 5; leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], 0); i += 5; initial_data[i] = spec.custom_id; i += 1; leb.writeUnsignedFixed(5, initial_data[i..(i + 5)], @intCast(u32, initial_size - @sizeOf(@TypeOf(initial_data)))); i += 5; initial_data[i] = 0; try file.pwriteAll(&initial_data, offset); return VecSection{ .section = .{ .offset = offset, .size = initial_size, }, .contents_size = 5, }; } fn deinit(self: *VecSection, allocator: *Allocator) void { self.entries.deinit(allocator); self.dead_list.deinit(allocator); } /// Write a new entry into the file, returning the index used. fn addEntry(self: *VecSection, file: fs.File, allocator: *Allocator, data: []const u8) !u32 { // First look for a dead entry we can reuse for (self.dead_list.items) |dead_idx, i| { const dead_entry = &self.entries.items[dead_idx]; if (dead_entry.size == data.len) { // Found a dead entry of the right length, overwrite it try file.pwriteAll(data, self.section.offset + Section.header_size + dead_entry.offset); _ = self.dead_list.swapRemove(i); return dead_idx; } } // TODO: We can be more efficient if we special-case one or // more consecutive dead entries at the end of the vector. // We failed to find a dead entry to reuse, so write the new // entry to the end of the section. try self.section.resize(file, self.contents_size, self.contents_size + @intCast(u32, data.len)); try file.pwriteAll(data, self.section.offset + Section.header_size + self.contents_size); try self.entries.append(allocator, .{ .offset = self.contents_size, .size = @intCast(u32, data.len), }); self.contents_size += @intCast(u32, data.len); // Make sure the dead list always has enough space to store all free'd // entries. This makes it so that delEntry() cannot fail. // TODO: figure out a better way that doesn't waste as much memory try self.dead_list.ensureCapacity(allocator, self.entries.items.len); // Update the size in the section header and the item count of // the contents vector. var size_and_count: [10]u8 = undefined; leb.writeUnsignedFixed(5, size_and_count[0..5], self.contents_size); leb.writeUnsignedFixed(5, size_and_count[5..], @intCast(u32, self.entries.items.len)); try file.pwriteAll(&size_and_count, self.section.offset + 1); return @intCast(u32, self.entries.items.len - 1); } /// Mark the type referenced by the given index as dead. fn delEntry(self: *VecSection, index: u32) void { self.dead_list.appendAssumeCapacity(index); } }; const Types = struct { typesec: VecSection, fn init(file: fs.File, offset: u64, initial_size: u64) !Types { return Types{ .typesec = try VecSection.init(spec.types_id, file, offset, initial_size) }; } fn deinit(self: *Types) void { const wasm = @fieldParentPtr(Wasm, "types", self); self.typesec.deinit(wasm.base.allocator); } fn new(self: *Types, data: []const u8) !u32 { const wasm = @fieldParentPtr(Wasm, "types", self); return self.typesec.addEntry(wasm.base.file.?, wasm.base.allocator, data); } fn free(self: *Types, typeidx: u32) void { self.typesec.delEntry(typeidx); } }; const Funcs = struct { /// This section needs special handling to keep the indexes matching with /// the codesec, so we cant just use a VecSection. funcsec: Section, /// The typeidx stored for each function, indexed by funcidx. func_types: std.ArrayListUnmanaged(u32) = std.ArrayListUnmanaged(u32){}, codesec: VecSection, fn init(file: fs.File, funcs_offset: u64, funcs_size: u64, code_offset: u64, code_size: u64) !Funcs { return Funcs{ .funcsec = (try VecSection.init(spec.funcs_id, file, funcs_offset, funcs_size)).section, .codesec = try VecSection.init(spec.code_id, file, code_offset, code_size), }; } fn deinit(self: *Funcs) void { const wasm = @fieldParentPtr(Wasm, "funcs", self); self.func_types.deinit(wasm.base.allocator); self.codesec.deinit(wasm.base.allocator); } /// Add a new function to the binary, first finding space for and writing /// the code then writing the typeidx to the corresponding index in the /// funcsec. Returns the function index used. fn new(self: *Funcs, typeidx: u32, code: []const u8) !u32 { const wasm = @fieldParentPtr(Wasm, "funcs", self); const file = wasm.base.file.?; const allocator = wasm.base.allocator; assert(self.func_types.items.len == self.codesec.entries.items.len); // TODO: consider nop-padding the code if there is a close but not perfect fit const funcidx = try self.codesec.addEntry(file, allocator, code); if (self.func_types.items.len < self.codesec.entries.items.len) { // u32 vector length + funcs_count u32s in the vector const current = 5 + @intCast(u32, self.func_types.items.len) * 5; try self.funcsec.resize(file, current, current + 5); try self.func_types.append(allocator, typeidx); // Update the size in the section header and the item count of // the contents vector. const count = @intCast(u32, self.func_types.items.len); var size_and_count: [10]u8 = undefined; leb.writeUnsignedFixed(5, size_and_count[0..5], 5 + count * 5); leb.writeUnsignedFixed(5, size_and_count[5..], count); try file.pwriteAll(&size_and_count, self.funcsec.offset + 1); } else { // We are overwriting a dead function and may now free the type wasm.types.free(self.func_types.items[funcidx]); } assert(self.func_types.items.len == self.codesec.entries.items.len); var typeidx_leb: [5]u8 = undefined; leb.writeUnsignedFixed(5, &typeidx_leb, typeidx); try file.pwriteAll(&typeidx_leb, self.funcsec.offset + Section.header_size + 5 + funcidx * 5); return funcidx; } fn free(self: *Funcs, funcidx: u32) void { self.codesec.delEntry(funcidx); } }; /// Exports are tricky. We can't leave dead entries in the binary as they /// would obviously be visible from the execution environment. The simplest /// way to work around this is to re-emit the export section whenever /// something changes. This also makes it easier to ensure exported function /// and global indexes are updated as they change. const Exports = struct { exportsec: Section, /// Size in bytes of the contents of the section. Does not include /// the "header" containing the section id and this value. contents_size: u32, /// If this is true, then exports will be rewritten on flush() dirty: bool, fn init(file: fs.File, offset: u64, initial_size: u64) !Exports { return Exports{ .exportsec = (try VecSection.init(spec.exports_id, file, offset, initial_size)).section, .contents_size = 5, .dirty = false, }; } fn writeAll(self: *Exports, module: *Module) !void { const wasm = @fieldParentPtr(Wasm, "exports", self); const file = wasm.base.file.?; var buf: [5]u8 = undefined; // First ensure the section is the right size var export_count: u32 = 0; var new_contents_size: u32 = 5; for (module.decl_exports.entries.items) |entry| { for (entry.value) |e| { export_count += 1; new_contents_size += calcSize(e); } } if (new_contents_size != self.contents_size) { try self.exportsec.resize(file, self.contents_size, new_contents_size); leb.writeUnsignedFixed(5, &buf, new_contents_size); try file.pwriteAll(&buf, self.exportsec.offset + 1); } try file.seekTo(self.exportsec.offset + Section.header_size); const writer = file.writer(); // Length of the exports vec leb.writeUnsignedFixed(5, &buf, export_count); try writer.writeAll(&buf); for (module.decl_exports.entries.items) |entry| for (entry.value) |e| try writeExport(writer, e); self.dirty = false; } /// Return the total number of bytes an export will take. /// TODO: fixed-width LEB128 is currently used for simplicity, but should /// be replaced with proper variable-length LEB128 as it is inefficient. fn calcSize(e: *Module.Export) u32 { // LEB128 name length + name bytes + export type + LEB128 index return 5 + @intCast(u32, e.options.name.len) + 1 + 5; } /// Write the data for a single export to the given file at a given offset. /// TODO: fixed-width LEB128 is currently used for simplicity, but should /// be replaced with proper variable-length LEB128 as it is inefficient. fn writeExport(writer: anytype, e: *Module.Export) !void { var buf: [5]u8 = undefined; // Export name length + name leb.writeUnsignedFixed(5, &buf, @intCast(u32, e.options.name.len)); try writer.writeAll(&buf); try writer.writeAll(e.options.name); switch (e.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) { .Fn => { // Type of the export try writer.writeByte(0x00); // Exported function index leb.writeUnsignedFixed(5, &buf, e.exported_decl.fn_link.wasm.?.funcidx); try writer.writeAll(&buf); }, else => return error.TODOImplementNonFnDeclsForWasm, } } };