const Wasm = @This(); const std = @import("std"); const assert = std.debug.assert; const build_options = @import("build_options"); const builtin = @import("builtin"); const codegen = @import("../codegen.zig"); const fs = std.fs; const leb = std.leb; const link = @import("../link.zig"); const lldMain = @import("../main.zig").lldMain; const log = std.log.scoped(.link); const gc_log = std.log.scoped(.gc); const mem = std.mem; const trace = @import("../tracy.zig").trace; const types = @import("Wasm/types.zig"); const wasi_libc = @import("../wasi_libc.zig"); const Air = @import("../Air.zig"); const Allocator = std.mem.Allocator; const Archive = @import("Wasm/Archive.zig"); const Cache = std.Build.Cache; const CodeGen = @import("../arch/wasm/CodeGen.zig"); const Compilation = @import("../Compilation.zig"); const Dwarf = @import("Dwarf.zig"); const File = @import("Wasm/file.zig").File; const InternPool = @import("../InternPool.zig"); const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Module = @import("../Module.zig"); const Object = @import("Wasm/Object.zig"); const Symbol = @import("Wasm/Symbol.zig"); const Type = @import("../type.zig").Type; const TypedValue = @import("../TypedValue.zig"); const ZigObject = @import("Wasm/ZigObject.zig"); pub const Atom = @import("Wasm/Atom.zig"); pub const Relocation = types.Relocation; pub const base_tag: link.File.Tag = .wasm; base: link.File, /// Symbol name of the entry function to export entry_name: ?[]const u8, /// When true, will allow undefined symbols import_symbols: bool, /// List of *global* symbol names to export to the host environment. export_symbol_names: []const []const u8, /// When defined, sets the start of the data section. global_base: ?u64, /// When defined, sets the initial memory size of the memory. initial_memory: ?u64, /// When defined, sets the maximum memory size of the memory. max_memory: ?u64, /// When true, will import the function table from the host environment. import_table: bool, /// When true, will export the function table to the host environment. export_table: bool, /// Output name of the file name: []const u8, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. llvm_object: ?*LlvmObject = null, /// The file index of a `ZigObject`. This will only contain a valid index when a zcu exists, /// and the chosen backend is the Wasm backend. zig_object_index: File.Index = .null, /// List of relocatable files to be linked into the final binary. files: std.MultiArrayList(File.Entry) = .{}, /// When importing objects from the host environment, a name must be supplied. /// LLVM uses "env" by default when none is given. This would be a good default for Zig /// to support existing code. /// TODO: Allow setting this through a flag? host_name: []const u8 = "env", /// List of symbols generated by the linker. synthetic_symbols: std.ArrayListUnmanaged(Symbol) = .{}, /// Maps atoms to their segment index atoms: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, /// List of all atoms. managed_atoms: std.ArrayListUnmanaged(Atom) = .{}, /// Represents the index into `segments` where the 'code' section /// lives. code_section_index: ?u32 = null, /// The index of the segment representing the custom '.debug_info' section. debug_info_index: ?u32 = null, /// The index of the segment representing the custom '.debug_line' section. debug_line_index: ?u32 = null, /// The index of the segment representing the custom '.debug_loc' section. debug_loc_index: ?u32 = null, /// The index of the segment representing the custom '.debug_ranges' section. debug_ranges_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubnames' section. debug_pubnames_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubtypes' section. debug_pubtypes_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubtypes' section. debug_str_index: ?u32 = null, /// The index of the segment representing the custom '.debug_pubtypes' section. debug_abbrev_index: ?u32 = null, /// The count of imported functions. This number will be appended /// to the function indexes as their index starts at the lowest non-extern function. imported_functions_count: u32 = 0, /// The count of imported wasm globals. This number will be appended /// to the global indexes when sections are merged. imported_globals_count: u32 = 0, /// The count of imported tables. This number will be appended /// to the table indexes when sections are merged. imported_tables_count: u32 = 0, /// Map of symbol locations, represented by its `types.Import` imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{}, /// Represents non-synthetic section entries. /// Used for code, data and custom sections. segments: std.ArrayListUnmanaged(Segment) = .{}, /// Maps a data segment key (such as .rodata) to the index into `segments`. data_segments: std.StringArrayHashMapUnmanaged(u32) = .{}, /// A table of `types.Segment` which provide meta data /// about a data symbol such as its name where the key is /// the segment index, which can be found from `data_segments` segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{}, /// Deduplicated string table for strings used by symbols, imports and exports. string_table: StringTable = .{}, // Output sections /// Output type section func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{}, /// Output function section where the key is the original /// function index and the value is function. /// This allows us to map multiple symbols to the same function. functions: std.AutoArrayHashMapUnmanaged( struct { file: File.Index, index: u32 }, struct { func: std.wasm.Func, sym_index: Symbol.Index }, ) = .{}, /// Output global section wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{}, /// Memory section memories: std.wasm.Memory = .{ .limits = .{ .min = 0, .max = undefined, .flags = 0, } }, /// Output table section tables: std.ArrayListUnmanaged(std.wasm.Table) = .{}, /// Output export section exports: std.ArrayListUnmanaged(types.Export) = .{}, /// List of initialization functions. These must be called in order of priority /// by the (synthetic) __wasm_call_ctors function. init_funcs: std.ArrayListUnmanaged(InitFuncLoc) = .{}, /// Index to a function defining the entry of the wasm file entry: ?u32 = null, /// Indirect function table, used to call function pointers /// When this is non-zero, we must emit a table entry, /// as well as an 'elements' section. /// /// Note: Key is symbol location, value represents the index into the table function_table: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{}, /// All object files and their data which are linked into the final binary objects: std.ArrayListUnmanaged(File.Index) = .{}, /// All archive files that are lazy loaded. /// e.g. when an undefined symbol references a symbol from the archive. archives: std.ArrayListUnmanaged(Archive) = .{}, /// A map of global names (read: offset into string table) to their symbol location globals: std.AutoHashMapUnmanaged(u32, SymbolLoc) = .{}, /// The list of GOT symbols and their location got_symbols: std.ArrayListUnmanaged(SymbolLoc) = .{}, /// Maps discarded symbols and their positions to the location of the symbol /// it was resolved to discarded: std.AutoHashMapUnmanaged(SymbolLoc, SymbolLoc) = .{}, /// List of all symbol locations which have been resolved by the linker and will be emit /// into the final binary. resolved_symbols: std.AutoArrayHashMapUnmanaged(SymbolLoc, void) = .{}, /// Symbols that remain undefined after symbol resolution. /// Note: The key represents an offset into the string table, rather than the actual string. undefs: std.AutoArrayHashMapUnmanaged(u32, SymbolLoc) = .{}, /// Maps a symbol's location to an atom. This can be used to find meta /// data of a symbol, such as its size, or its offset to perform a relocation. /// Undefined (and synthetic) symbols do not have an Atom and therefore cannot be mapped. symbol_atom: std.AutoHashMapUnmanaged(SymbolLoc, Atom.Index) = .{}, pub const Alignment = types.Alignment; pub const Segment = struct { alignment: Alignment, size: u32, offset: u32, flags: u32, pub const Flag = enum(u32) { WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, }; pub fn isPassive(segment: Segment) bool { return segment.flags & @intFromEnum(Flag.WASM_DATA_SEGMENT_IS_PASSIVE) != 0; } /// For a given segment, determines if it needs passive initialization fn needsPassiveInitialization(segment: Segment, import_mem: bool, name: []const u8) bool { if (import_mem and !std.mem.eql(u8, name, ".bss")) { return true; } return segment.isPassive(); } }; pub const SymbolLoc = struct { /// The index of the symbol within the specified file index: Symbol.Index, /// The index of the object file where the symbol resides. file: File.Index, /// From a given location, returns the corresponding symbol in the wasm binary pub fn getSymbol(loc: SymbolLoc, wasm_file: *const Wasm) *Symbol { if (wasm_file.discarded.get(loc)) |new_loc| { return new_loc.getSymbol(wasm_file); } if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbol(loc.index); } return &wasm_file.synthetic_symbols.items[@intFromEnum(loc.index)]; } /// From a given location, returns the name of the symbol. pub fn getName(loc: SymbolLoc, wasm_file: *const Wasm) []const u8 { if (wasm_file.discarded.get(loc)) |new_loc| { return new_loc.getName(wasm_file); } if (wasm_file.file(loc.file)) |obj_file| { return obj_file.symbolName(loc.index); } const sym = wasm_file.synthetic_symbols.items[@intFromEnum(loc.index)]; return wasm_file.string_table.get(sym.name); } /// From a given symbol location, returns the final location. /// e.g. when a symbol was resolved and replaced by the symbol /// in a different file, this will return said location. /// If the symbol wasn't replaced by another, this will return /// the given location itwasm. pub fn finalLoc(loc: SymbolLoc, wasm_file: *const Wasm) SymbolLoc { if (wasm_file.discarded.get(loc)) |new_loc| { return new_loc.finalLoc(wasm_file); } return loc; } }; // Contains the location of the function symbol, as well as /// the priority itself of the initialization function. pub const InitFuncLoc = struct { /// object file index in the list of objects. /// Unlike `SymbolLoc` this cannot be `null` as we never define /// our own ctors. file: File.Index, /// Symbol index within the corresponding object file. index: Symbol.Index, /// The priority in which the constructor must be called. priority: u32, /// From a given `InitFuncLoc` returns the corresponding function symbol fn getSymbol(loc: InitFuncLoc, wasm: *const Wasm) *Symbol { return getSymbolLoc(loc).getSymbol(wasm); } /// Turns the given `InitFuncLoc` into a `SymbolLoc` fn getSymbolLoc(loc: InitFuncLoc) SymbolLoc { return .{ .file = loc.file, .index = loc.index }; } /// Returns true when `lhs` has a higher priority (e.i. value closer to 0) than `rhs`. fn lessThan(ctx: void, lhs: InitFuncLoc, rhs: InitFuncLoc) bool { _ = ctx; return lhs.priority < rhs.priority; } }; /// Generic string table that duplicates strings /// and converts them into offsets instead. pub const StringTable = struct { /// Table that maps string offsets, which is used to de-duplicate strings. /// Rather than having the offset map to the data, the `StringContext` holds all bytes of the string. /// The strings are stored as a contigious array where each string is zero-terminated. string_table: std.HashMapUnmanaged( u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage, ) = .{}, /// Holds the actual data of the string table. string_data: std.ArrayListUnmanaged(u8) = .{}, /// Accepts a string and searches for a corresponding string. /// When found, de-duplicates the string and returns the existing offset instead. /// When the string is not found in the `string_table`, a new entry will be inserted /// and the new offset to its data will be returned. pub fn put(table: *StringTable, allocator: Allocator, string: []const u8) !u32 { const gop = try table.string_table.getOrPutContextAdapted( allocator, string, std.hash_map.StringIndexAdapter{ .bytes = &table.string_data }, .{ .bytes = &table.string_data }, ); if (gop.found_existing) { const off = gop.key_ptr.*; log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); return off; } try table.string_data.ensureUnusedCapacity(allocator, string.len + 1); const offset = @as(u32, @intCast(table.string_data.items.len)); log.debug("writing new string '{s}' at offset 0x{x}", .{ string, offset }); table.string_data.appendSliceAssumeCapacity(string); table.string_data.appendAssumeCapacity(0); gop.key_ptr.* = offset; return offset; } /// From a given offset, returns its corresponding string value. /// Asserts offset does not exceed bounds. pub fn get(table: StringTable, off: u32) []const u8 { assert(off < table.string_data.items.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(table.string_data.items.ptr + off)), 0); } /// Returns the offset of a given string when it exists. /// Will return null if the given string does not yet exist within the string table. pub fn getOffset(table: *StringTable, string: []const u8) ?u32 { return table.string_table.getKeyAdapted( string, std.hash_map.StringIndexAdapter{ .bytes = &table.string_data }, ); } /// Frees all resources of the string table. Any references pointing /// to the strings will be invalid. pub fn deinit(table: *StringTable, allocator: Allocator) void { table.string_data.deinit(allocator); table.string_table.deinit(allocator); table.* = undefined; } }; pub fn open( arena: Allocator, comp: *Compilation, emit: Compilation.Emit, options: link.File.OpenOptions, ) !*Wasm { // TODO: restore saved linker state, don't truncate the file, and // participate in incremental compilation. return createEmpty(arena, comp, emit, options); } pub fn createEmpty( arena: Allocator, comp: *Compilation, emit: Compilation.Emit, options: link.File.OpenOptions, ) !*Wasm { const gpa = comp.gpa; const target = comp.root_mod.resolved_target.result; assert(target.ofmt == .wasm); const use_lld = build_options.have_llvm and comp.config.use_lld; const use_llvm = comp.config.use_llvm; const output_mode = comp.config.output_mode; const shared_memory = comp.config.shared_memory; const wasi_exec_model = comp.config.wasi_exec_model; // If using LLD to link, this code should produce an object file so that it // can be passed to LLD. // If using LLVM to generate the object file for the zig compilation unit, // we need a place to put the object file so that it can be subsequently // handled. const zcu_object_sub_path = if (!use_lld and !use_llvm) null else try std.fmt.allocPrint(arena, "{s}.o", .{emit.sub_path}); const wasm = try arena.create(Wasm); wasm.* = .{ .base = .{ .tag = .wasm, .comp = comp, .emit = emit, .zcu_object_sub_path = zcu_object_sub_path, .gc_sections = options.gc_sections orelse (output_mode != .Obj), .print_gc_sections = options.print_gc_sections, .stack_size = options.stack_size orelse switch (target.os.tag) { .freestanding => 1 * 1024 * 1024, // 1 MiB else => 16 * 1024 * 1024, // 16 MiB }, .allow_shlib_undefined = options.allow_shlib_undefined orelse false, .file = null, .disable_lld_caching = options.disable_lld_caching, .build_id = options.build_id, .rpath_list = options.rpath_list, }, .name = undefined, .import_table = options.import_table, .export_table = options.export_table, .import_symbols = options.import_symbols, .export_symbol_names = options.export_symbol_names, .global_base = options.global_base, .initial_memory = options.initial_memory, .max_memory = options.max_memory, .entry_name = switch (options.entry) { .disabled => null, .default => if (output_mode != .Exe) null else defaultEntrySymbolName(wasi_exec_model), .enabled => defaultEntrySymbolName(wasi_exec_model), .named => |name| name, }, }; if (use_llvm and comp.config.have_zcu) { wasm.llvm_object = try LlvmObject.create(arena, comp); } errdefer wasm.base.destroy(); if (use_lld and (use_llvm or !comp.config.have_zcu)) { // LLVM emits the object file (if any); LLD links it into the final product. return wasm; } // What path should this Wasm linker code output to? // If using LLD to link, this code should produce an object file so that it // can be passed to LLD. const sub_path = if (use_lld) zcu_object_sub_path.? else emit.sub_path; wasm.base.file = try emit.directory.handle.createFile(sub_path, .{ .truncate = true, .read = true, .mode = if (fs.has_executable_bit) if (target.os.tag == .wasi and output_mode == .Exe) fs.File.default_mode | 0b001_000_000 else fs.File.default_mode else 0, }); wasm.name = sub_path; // create stack pointer symbol { const loc = try wasm.createSyntheticSymbol("__stack_pointer", .global); const symbol = loc.getSymbol(wasm); // For object files we will import the stack pointer symbol if (output_mode == .Obj) { symbol.setUndefined(true); symbol.index = @intCast(wasm.imported_globals_count); wasm.imported_globals_count += 1; try wasm.imports.putNoClobber( gpa, loc, .{ .module_name = try wasm.string_table.put(gpa, wasm.host_name), .name = symbol.name, .kind = .{ .global = .{ .valtype = .i32, .mutable = true } }, }, ); } else { symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); const global = try wasm.wasm_globals.addOne(gpa); global.* = .{ .global_type = .{ .valtype = .i32, .mutable = true, }, .init = .{ .i32_const = 0 }, }; } } // create indirect function pointer symbol { const loc = try wasm.createSyntheticSymbol("__indirect_function_table", .table); const symbol = loc.getSymbol(wasm); const table: std.wasm.Table = .{ .limits = .{ .flags = 0, .min = 0, .max = undefined }, // will be overwritten during `mapFunctionTable` .reftype = .funcref, }; if (output_mode == .Obj or options.import_table) { symbol.setUndefined(true); symbol.index = @intCast(wasm.imported_tables_count); wasm.imported_tables_count += 1; try wasm.imports.put(gpa, loc, .{ .module_name = try wasm.string_table.put(gpa, wasm.host_name), .name = symbol.name, .kind = .{ .table = table }, }); } else { symbol.index = @as(u32, @intCast(wasm.imported_tables_count + wasm.tables.items.len)); try wasm.tables.append(gpa, table); if (wasm.export_table) { symbol.setFlag(.WASM_SYM_EXPORTED); } else { symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); } } } // create __wasm_call_ctors { const loc = try wasm.createSyntheticSymbol("__wasm_call_ctors", .function); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); // we do not know the function index until after we merged all sections. // Therefore we set `symbol.index` and create its corresponding references // at the end during `initializeCallCtorsFunction`. } // shared-memory symbols for TLS support if (shared_memory) { { const loc = try wasm.createSyntheticSymbol("__tls_base", .global); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = true }, .init = .{ .i32_const = undefined }, }); } { const loc = try wasm.createSyntheticSymbol("__tls_size", .global); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = false }, .init = .{ .i32_const = undefined }, }); } { const loc = try wasm.createSyntheticSymbol("__tls_align", .global); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); symbol.mark(); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = false }, .init = .{ .i32_const = undefined }, }); } { const loc = try wasm.createSyntheticSymbol("__wasm_init_tls", .function); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); } } if (comp.module) |zcu| { if (!use_llvm) { const index: File.Index = @enumFromInt(wasm.files.len); var zig_object: ZigObject = .{ .index = index, .path = try std.fmt.allocPrint(gpa, "{s}.o", .{std.fs.path.stem(zcu.main_mod.root_src_path)}), .stack_pointer_sym = .null, }; try zig_object.init(wasm); try wasm.files.append(gpa, .{ .zig_object = zig_object }); wasm.zig_object_index = index; } } return wasm; } pub fn file(wasm: *const Wasm, index: File.Index) ?File { if (index == .null) return null; const tag = wasm.files.items(.tags)[@intFromEnum(index)]; return switch (tag) { .zig_object => .{ .zig_object = &wasm.files.items(.data)[@intFromEnum(index)].zig_object }, .object => .{ .object = &wasm.files.items(.data)[@intFromEnum(index)].object }, }; } pub fn zigObjectPtr(wasm: *Wasm) ?*ZigObject { if (wasm.zig_object_index == .null) return null; return &wasm.files.items(.data)[@intFromEnum(wasm.zig_object_index)].zig_object; } pub fn getTypeIndex(wasm: *const Wasm, func_type: std.wasm.Type) ?u32 { var index: u32 = 0; while (index < wasm.func_types.items.len) : (index += 1) { if (wasm.func_types.items[index].eql(func_type)) return index; } return null; } /// Either creates a new import, or updates one if existing. /// When `type_index` is non-null, we assume an external function. /// In all other cases, a data-symbol will be created instead. pub fn addOrUpdateImport( wasm: *Wasm, /// Name of the import name: []const u8, /// Symbol index that is external symbol_index: Symbol.Index, /// Optional library name (i.e. `extern "c" fn foo() void` lib_name: ?[:0]const u8, /// The index of the type that represents the function signature /// when the extern is a function. When this is null, a data-symbol /// is asserted instead. type_index: ?u32, ) !void { return wasm.zigObjectPtr().?.addOrUpdateImport(wasm, name, symbol_index, lib_name, type_index); } /// For a given name, creates a new global synthetic symbol. /// Leaves index undefined and the default flags (0). fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !SymbolLoc { const gpa = wasm.base.comp.gpa; const name_offset = try wasm.string_table.put(gpa, name); return wasm.createSyntheticSymbolOffset(name_offset, tag); } fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc { const sym_index: Symbol.Index = @enumFromInt(wasm.synthetic_symbols.items.len); const loc: SymbolLoc = .{ .index = sym_index, .file = .null }; const gpa = wasm.base.comp.gpa; try wasm.synthetic_symbols.append(gpa, .{ .name = name_offset, .flags = 0, .tag = tag, .index = undefined, .virtual_address = undefined, }); try wasm.resolved_symbols.putNoClobber(gpa, loc, {}); try wasm.globals.put(gpa, name_offset, loc); return loc; } fn parseInputFiles(wasm: *Wasm, files: []const []const u8) !void { for (files) |path| { if (try wasm.parseObjectFile(path)) continue; if (try wasm.parseArchive(path, false)) continue; // load archives lazily log.warn("Unexpected file format at path: '{s}'", .{path}); } } /// Parses the object file from given path. Returns true when the given file was an object /// file and parsed successfully. Returns false when file is not an object file. /// May return an error instead when parsing failed. fn parseObjectFile(wasm: *Wasm, path: []const u8) !bool { const obj_file = try fs.cwd().openFile(path, .{}); errdefer obj_file.close(); const gpa = wasm.base.comp.gpa; var object = Object.create(wasm, obj_file, path, null) catch |err| switch (err) { error.InvalidMagicByte, error.NotObjectFile => return false, else => |e| { var err_note = try wasm.addErrorWithNotes(1); try err_note.addMsg(wasm, "Failed parsing object file: {s}", .{@errorName(e)}); try err_note.addNote(wasm, "while parsing '{s}'", .{path}); return error.FlushFailure; }, }; errdefer object.deinit(gpa); object.index = @enumFromInt(wasm.files.len); try wasm.files.append(gpa, .{ .object = object }); try wasm.objects.append(gpa, object.index); return true; } /// Creates a new empty `Atom` and returns its `Atom.Index` pub fn createAtom(wasm: *Wasm, sym_index: Symbol.Index, file_index: File.Index) !Atom.Index { const gpa = wasm.base.comp.gpa; const index: Atom.Index = @enumFromInt(wasm.managed_atoms.items.len); const atom = try wasm.managed_atoms.addOne(gpa); atom.* = .{ .file = file_index, .sym_index = sym_index }; try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), index); return index; } pub inline fn getAtom(wasm: *const Wasm, index: Atom.Index) Atom { return wasm.managed_atoms.items[@intFromEnum(index)]; } pub inline fn getAtomPtr(wasm: *Wasm, index: Atom.Index) *Atom { return &wasm.managed_atoms.items[@intFromEnum(index)]; } /// Parses an archive file and will then parse each object file /// that was found in the archive file. /// Returns false when the file is not an archive file. /// May return an error instead when parsing failed. /// /// When `force_load` is `true`, it will for link all object files in the archive. /// When false, it will only link with object files that contain symbols that /// are referenced by other object files or Zig code. fn parseArchive(wasm: *Wasm, path: []const u8, force_load: bool) !bool { const gpa = wasm.base.comp.gpa; const archive_file = try fs.cwd().openFile(path, .{}); errdefer archive_file.close(); var archive: Archive = .{ .file = archive_file, .name = path, }; archive.parse(gpa) catch |err| switch (err) { error.EndOfStream, error.NotArchive => { archive.deinit(gpa); return false; }, else => |e| { var err_note = try wasm.addErrorWithNotes(1); try err_note.addMsg(wasm, "Failed parsing archive: {s}", .{@errorName(e)}); try err_note.addNote(wasm, "while parsing archive {s}", .{path}); return error.FlushFailure; }, }; if (!force_load) { errdefer archive.deinit(gpa); try wasm.archives.append(gpa, archive); return true; } defer archive.deinit(gpa); // In this case we must force link all embedded object files within the archive // We loop over all symbols, and then group them by offset as the offset // notates where the object file starts. var offsets = std.AutoArrayHashMap(u32, void).init(gpa); defer offsets.deinit(); for (archive.toc.values()) |symbol_offsets| { for (symbol_offsets.items) |sym_offset| { try offsets.put(sym_offset, {}); } } for (offsets.keys()) |file_offset| { var object = archive.parseObject(wasm, file_offset) catch |e| { var err_note = try wasm.addErrorWithNotes(1); try err_note.addMsg(wasm, "Failed parsing object: {s}", .{@errorName(e)}); try err_note.addNote(wasm, "while parsing object in archive {s}", .{path}); return error.FlushFailure; }; object.index = @enumFromInt(wasm.files.len); try wasm.files.append(gpa, .{ .object = object }); try wasm.objects.append(gpa, object.index); } return true; } fn requiresTLSReloc(wasm: *const Wasm) bool { for (wasm.got_symbols.items) |loc| { if (loc.getSymbol(wasm).isTLS()) { return true; } } return false; } fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void { const gpa = wasm.base.comp.gpa; const obj_file = wasm.file(file_index).?; log.debug("Resolving symbols in object: '{s}'", .{obj_file.path()}); for (obj_file.symbols(), 0..) |symbol, i| { const sym_index: Symbol.Index = @enumFromInt(i); const location: SymbolLoc = .{ .file = file_index, .index = sym_index }; const sym_name = obj_file.string(symbol.name); if (mem.eql(u8, sym_name, "__indirect_function_table")) { continue; } const sym_name_index = try wasm.string_table.put(gpa, sym_name); if (symbol.isLocal()) { if (symbol.isUndefined()) { var err = try wasm.addErrorWithNotes(1); try err.addMsg(wasm, "Local symbols are not allowed to reference imports", .{}); try err.addNote(wasm, "symbol '{s}' defined in '{s}'", .{ sym_name, obj_file.path() }); } try wasm.resolved_symbols.putNoClobber(gpa, location, {}); continue; } const maybe_existing = try wasm.globals.getOrPut(gpa, sym_name_index); if (!maybe_existing.found_existing) { maybe_existing.value_ptr.* = location; try wasm.resolved_symbols.putNoClobber(gpa, location, {}); if (symbol.isUndefined()) { try wasm.undefs.putNoClobber(gpa, sym_name_index, location); } continue; } const existing_loc = maybe_existing.value_ptr.*; const existing_sym: *Symbol = existing_loc.getSymbol(wasm); const existing_file = wasm.file(existing_loc.file); const existing_file_path = if (existing_file) |existing_obj_file| existing_obj_file.path() else wasm.name; if (!existing_sym.isUndefined()) outer: { if (!symbol.isUndefined()) inner: { if (symbol.isWeak()) { break :inner; // ignore the new symbol (discard it) } if (existing_sym.isWeak()) { break :outer; // existing is weak, while new one isn't. Replace it. } // both are defined and weak, we have a symbol collision. var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "symbol '{s}' defined multiple times", .{sym_name}); try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } try wasm.discarded.put(gpa, location, existing_loc); continue; // Do not overwrite defined symbols with undefined symbols } if (symbol.tag != existing_sym.tag) { var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "symbol '{s}' mismatching types '{s}' and '{s}'", .{ sym_name, @tagName(symbol.tag), @tagName(existing_sym.tag) }); try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } if (existing_sym.isUndefined() and symbol.isUndefined()) { // only verify module/import name for function symbols if (symbol.tag == .function) { const existing_name = if (existing_file) |existing_obj| blk: { const imp = existing_obj.import(existing_loc.index); break :blk existing_obj.string(imp.module_name); } else blk: { const name_index = wasm.imports.get(existing_loc).?.module_name; break :blk wasm.string_table.get(name_index); }; const imp = obj_file.import(sym_index); const module_name = obj_file.string(imp.module_name); if (!mem.eql(u8, existing_name, module_name)) { var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "symbol '{s}' module name mismatch. Expected '{s}', but found '{s}'", .{ sym_name, existing_name, module_name, }); try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } // both undefined so skip overwriting existing symbol and discard the new symbol try wasm.discarded.put(gpa, location, existing_loc); continue; } if (existing_sym.tag == .global) { const existing_ty = wasm.getGlobalType(existing_loc); const new_ty = wasm.getGlobalType(location); if (existing_ty.mutable != new_ty.mutable or existing_ty.valtype != new_ty.valtype) { var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "symbol '{s}' mismatching global types", .{sym_name}); try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } if (existing_sym.tag == .function) { const existing_ty = wasm.getFunctionSignature(existing_loc); const new_ty = wasm.getFunctionSignature(location); if (!existing_ty.eql(new_ty)) { var err = try wasm.addErrorWithNotes(3); try err.addMsg(wasm, "symbol '{s}' mismatching function signatures.", .{sym_name}); try err.addNote(wasm, "expected signature {}, but found signature {}", .{ existing_ty, new_ty }); try err.addNote(wasm, "first definition in '{s}'", .{existing_file_path}); try err.addNote(wasm, "next definition in '{s}'", .{obj_file.path()}); } } // when both symbols are weak, we skip overwriting unless the existing // symbol is weak and the new one isn't, in which case we *do* overwrite it. if (existing_sym.isWeak() and symbol.isWeak()) blk: { if (existing_sym.isUndefined() and !symbol.isUndefined()) break :blk; try wasm.discarded.put(gpa, location, existing_loc); continue; } // simply overwrite with the new symbol log.debug("Overwriting symbol '{s}'", .{sym_name}); log.debug(" old definition in '{s}'", .{existing_file_path}); log.debug(" new definition in '{s}'", .{obj_file.path()}); try wasm.discarded.putNoClobber(gpa, existing_loc, location); maybe_existing.value_ptr.* = location; try wasm.globals.put(gpa, sym_name_index, location); try wasm.resolved_symbols.put(gpa, location, {}); assert(wasm.resolved_symbols.swapRemove(existing_loc)); if (existing_sym.isUndefined()) { _ = wasm.undefs.swapRemove(sym_name_index); } } } fn resolveSymbolsInArchives(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; if (wasm.archives.items.len == 0) return; log.debug("Resolving symbols in archives", .{}); var index: u32 = 0; undef_loop: while (index < wasm.undefs.count()) { const sym_name_index = wasm.undefs.keys()[index]; for (wasm.archives.items) |archive| { const sym_name = wasm.string_table.get(sym_name_index); log.debug("Detected symbol '{s}' in archive '{s}', parsing objects..", .{ sym_name, archive.name }); const offset = archive.toc.get(sym_name) orelse { // symbol does not exist in this archive continue; }; // Symbol is found in unparsed object file within current archive. // Parse object and and resolve symbols again before we check remaining // undefined symbols. var object = archive.parseObject(wasm, offset.items[0]) catch |e| { var err_note = try wasm.addErrorWithNotes(1); try err_note.addMsg(wasm, "Failed parsing object: {s}", .{@errorName(e)}); try err_note.addNote(wasm, "while parsing object in archive {s}", .{archive.name}); return error.FlushFailure; }; object.index = @enumFromInt(wasm.files.len); try wasm.files.append(gpa, .{ .object = object }); try wasm.objects.append(gpa, object.index); try wasm.resolveSymbolsInObject(object.index); // continue loop for any remaining undefined symbols that still exist // after resolving last object file continue :undef_loop; } index += 1; } } /// Writes an unsigned 32-bit integer as a LEB128-encoded 'i32.const' value. fn writeI32Const(writer: anytype, val: u32) !void { try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeILEB128(writer, @as(i32, @bitCast(val))); } fn setupInitMemoryFunction(wasm: *Wasm) !void { const comp = wasm.base.comp; const gpa = comp.gpa; const shared_memory = comp.config.shared_memory; const import_memory = comp.config.import_memory; // Passive segments are used to avoid memory being reinitialized on each // thread's instantiation. These passive segments are initialized and // dropped in __wasm_init_memory, which is registered as the start function // We also initialize bss segments (using memory.fill) as part of this // function. if (!wasm.hasPassiveInitializationSegments()) { return; } const sym_loc = try wasm.createSyntheticSymbol("__wasm_init_memory", .function); sym_loc.getSymbol(wasm).mark(); const flag_address: u32 = if (shared_memory) address: { // when we have passive initialization segments and shared memory // `setupMemory` will create this symbol and set its virtual address. const loc = wasm.findGlobalSymbol("__wasm_init_memory_flag").?; break :address loc.getSymbol(wasm).virtual_address; } else 0; var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); // we have 0 locals try leb.writeULEB128(writer, @as(u32, 0)); if (shared_memory) { // destination blocks // based on values we jump to corresponding label try writer.writeByte(std.wasm.opcode(.block)); // $drop try writer.writeByte(std.wasm.block_empty); // block type try writer.writeByte(std.wasm.opcode(.block)); // $wait try writer.writeByte(std.wasm.block_empty); // block type try writer.writeByte(std.wasm.opcode(.block)); // $init try writer.writeByte(std.wasm.block_empty); // block type // atomically check try writeI32Const(writer, flag_address); try writeI32Const(writer, 0); try writeI32Const(writer, 1); try writer.writeByte(std.wasm.opcode(.atomics_prefix)); try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.i32_atomic_rmw_cmpxchg)); try leb.writeULEB128(writer, @as(u32, 2)); // alignment try leb.writeULEB128(writer, @as(u32, 0)); // offset // based on the value from the atomic check, jump to the label. try writer.writeByte(std.wasm.opcode(.br_table)); try leb.writeULEB128(writer, @as(u32, 2)); // length of the table (we have 3 blocks but because of the mandatory default the length is 2). try leb.writeULEB128(writer, @as(u32, 0)); // $init try leb.writeULEB128(writer, @as(u32, 1)); // $wait try leb.writeULEB128(writer, @as(u32, 2)); // $drop try writer.writeByte(std.wasm.opcode(.end)); } var it = wasm.data_segments.iterator(); var segment_index: u32 = 0; while (it.next()) |entry| : (segment_index += 1) { const segment: Segment = wasm.segments.items[entry.value_ptr.*]; if (segment.needsPassiveInitialization(import_memory, entry.key_ptr.*)) { // For passive BSS segments we can simple issue a memory.fill(0). // For non-BSS segments we do a memory.init. Both these // instructions take as their first argument the destination // address. try writeI32Const(writer, segment.offset); if (shared_memory and std.mem.eql(u8, entry.key_ptr.*, ".tdata")) { // When we initialize the TLS segment we also set the `__tls_base` // global. This allows the runtime to use this static copy of the // TLS data for the first/main thread. try writeI32Const(writer, segment.offset); try writer.writeByte(std.wasm.opcode(.global_set)); const loc = wasm.findGlobalSymbol("__tls_base").?; try leb.writeULEB128(writer, loc.getSymbol(wasm).index); } try writeI32Const(writer, 0); try writeI32Const(writer, segment.size); try writer.writeByte(std.wasm.opcode(.misc_prefix)); if (std.mem.eql(u8, entry.key_ptr.*, ".bss")) { // fill bss segment with zeroes try leb.writeULEB128(writer, std.wasm.miscOpcode(.memory_fill)); } else { // initialize the segment try leb.writeULEB128(writer, std.wasm.miscOpcode(.memory_init)); try leb.writeULEB128(writer, segment_index); } try writer.writeByte(0); // memory index immediate } } if (shared_memory) { // we set the init memory flag to value '2' try writeI32Const(writer, flag_address); try writeI32Const(writer, 2); try writer.writeByte(std.wasm.opcode(.atomics_prefix)); try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.i32_atomic_store)); try leb.writeULEB128(writer, @as(u32, 2)); // alignment try leb.writeULEB128(writer, @as(u32, 0)); // offset // notify any waiters for segment initialization completion try writeI32Const(writer, flag_address); try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeILEB128(writer, @as(i32, -1)); // number of waiters try writer.writeByte(std.wasm.opcode(.atomics_prefix)); try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.memory_atomic_notify)); try leb.writeULEB128(writer, @as(u32, 2)); // alignment try leb.writeULEB128(writer, @as(u32, 0)); // offset try writer.writeByte(std.wasm.opcode(.drop)); // branch and drop segments try writer.writeByte(std.wasm.opcode(.br)); try leb.writeULEB128(writer, @as(u32, 1)); // wait for thread to initialize memory segments try writer.writeByte(std.wasm.opcode(.end)); // end $wait try writeI32Const(writer, flag_address); try writeI32Const(writer, 1); // expected flag value try writer.writeByte(std.wasm.opcode(.i64_const)); try leb.writeILEB128(writer, @as(i64, -1)); // timeout try writer.writeByte(std.wasm.opcode(.atomics_prefix)); try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.memory_atomic_wait32)); try leb.writeULEB128(writer, @as(u32, 2)); // alignment try leb.writeULEB128(writer, @as(u32, 0)); // offset try writer.writeByte(std.wasm.opcode(.drop)); try writer.writeByte(std.wasm.opcode(.end)); // end $drop } it.reset(); segment_index = 0; while (it.next()) |entry| : (segment_index += 1) { const name = entry.key_ptr.*; const segment: Segment = wasm.segments.items[entry.value_ptr.*]; if (segment.needsPassiveInitialization(import_memory, name) and !std.mem.eql(u8, name, ".bss")) { // The TLS region should not be dropped since its is needed // during the initialization of each thread (__wasm_init_tls). if (shared_memory and std.mem.eql(u8, name, ".tdata")) { continue; } try writer.writeByte(std.wasm.opcode(.misc_prefix)); try leb.writeULEB128(writer, std.wasm.miscOpcode(.data_drop)); try leb.writeULEB128(writer, segment_index); } } // End of the function body try writer.writeByte(std.wasm.opcode(.end)); try wasm.createSyntheticFunction( "__wasm_init_memory", std.wasm.Type{ .params = &.{}, .returns = &.{} }, &function_body, ); } /// Constructs a synthetic function that performs runtime relocations for /// TLS symbols. This function is called by `__wasm_init_tls`. fn setupTLSRelocationsFunction(wasm: *Wasm) !void { const comp = wasm.base.comp; const gpa = comp.gpa; const shared_memory = comp.config.shared_memory; // When we have TLS GOT entries and shared memory is enabled, // we must perform runtime relocations or else we don't create the function. if (!shared_memory or !wasm.requiresTLSReloc()) { return; } const loc = try wasm.createSyntheticSymbol("__wasm_apply_global_tls_relocs", .function); loc.getSymbol(wasm).mark(); var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); // locals (we have none) try writer.writeByte(0); for (wasm.got_symbols.items, 0..) |got_loc, got_index| { const sym: *Symbol = got_loc.getSymbol(wasm); if (!sym.isTLS()) continue; // only relocate TLS symbols if (sym.tag == .data and sym.isDefined()) { // get __tls_base try writer.writeByte(std.wasm.opcode(.global_get)); try leb.writeULEB128(writer, wasm.findGlobalSymbol("__tls_base").?.getSymbol(wasm).index); // add the virtual address of the symbol try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeULEB128(writer, sym.virtual_address); } else if (sym.tag == .function) { @panic("TODO: relocate GOT entry of function"); } else continue; try writer.writeByte(std.wasm.opcode(.i32_add)); try writer.writeByte(std.wasm.opcode(.global_set)); try leb.writeULEB128(writer, wasm.imported_globals_count + @as(u32, @intCast(wasm.wasm_globals.items.len + got_index))); } try writer.writeByte(std.wasm.opcode(.end)); try wasm.createSyntheticFunction( "__wasm_apply_global_tls_relocs", std.wasm.Type{ .params = &.{}, .returns = &.{} }, &function_body, ); } fn validateFeatures( wasm: *const Wasm, to_emit: *[@typeInfo(types.Feature.Tag).Enum.fields.len]bool, emit_features_count: *u32, ) !void { const comp = wasm.base.comp; const target = comp.root_mod.resolved_target.result; const shared_memory = comp.config.shared_memory; const cpu_features = target.cpu.features; const infer = cpu_features.isEmpty(); // when the user did not define any features, we infer them from linked objects. const known_features_count = @typeInfo(types.Feature.Tag).Enum.fields.len; var allowed = [_]bool{false} ** known_features_count; var used = [_]u17{0} ** known_features_count; var disallowed = [_]u17{0} ** known_features_count; var required = [_]u17{0} ** known_features_count; // when false, we fail linking. We only verify this after a loop to catch all invalid features. var valid_feature_set = true; // will be set to true when there's any TLS segment found in any of the object files var has_tls = false; // When the user has given an explicit list of features to enable, // we extract them and insert each into the 'allowed' list. if (!infer) { inline for (@typeInfo(std.Target.wasm.Feature).Enum.fields) |feature_field| { if (cpu_features.isEnabled(feature_field.value)) { allowed[feature_field.value] = true; emit_features_count.* += 1; } } } // extract all the used, disallowed and required features from each // linked object file so we can test them. for (wasm.objects.items) |file_index| { const object: Object = wasm.files.items(.data)[@intFromEnum(file_index)].object; for (object.features) |feature| { const value = @as(u16, @intFromEnum(file_index)) << 1 | @as(u1, 1); switch (feature.prefix) { .used => { used[@intFromEnum(feature.tag)] = value; }, .disallowed => { disallowed[@intFromEnum(feature.tag)] = value; }, .required => { required[@intFromEnum(feature.tag)] = value; used[@intFromEnum(feature.tag)] = value; }, } } for (object.segment_info) |segment| { if (segment.isTLS()) { has_tls = true; } } } // when we infer the features, we allow each feature found in the 'used' set // and insert it into the 'allowed' set. When features are not inferred, // we validate that a used feature is allowed. for (used, 0..) |used_set, used_index| { const is_enabled = @as(u1, @truncate(used_set)) != 0; if (infer) { allowed[used_index] = is_enabled; emit_features_count.* += @intFromBool(is_enabled); } else if (is_enabled and !allowed[used_index]) { var err = try wasm.addErrorWithNotes(1); try err.addMsg(wasm, "feature '{}' not allowed, but used by linked object", .{@as(types.Feature.Tag, @enumFromInt(used_index))}); try err.addNote(wasm, "defined in '{s}'", .{wasm.files.items(.data)[used_set >> 1].object.path}); valid_feature_set = false; } } if (!valid_feature_set) { return error.FlushFailure; } if (shared_memory) { const disallowed_feature = disallowed[@intFromEnum(types.Feature.Tag.shared_mem)]; if (@as(u1, @truncate(disallowed_feature)) != 0) { try wasm.addErrorWithoutNotes( "shared-memory is disallowed by '{s}' because it wasn't compiled with 'atomics' and 'bulk-memory' features enabled", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}, ); valid_feature_set = false; } for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { if (!allowed[@intFromEnum(feature)]) { try wasm.addErrorWithoutNotes("feature '{}' is not used but is required for shared-memory", .{feature}); } } } if (has_tls) { for ([_]types.Feature.Tag{ .atomics, .bulk_memory }) |feature| { if (!allowed[@intFromEnum(feature)]) { try wasm.addErrorWithoutNotes("feature '{}' is not used but is required for thread-local storage", .{feature}); } } } // For each linked object, validate the required and disallowed features for (wasm.objects.items) |file_index| { var object_used_features = [_]bool{false} ** known_features_count; const object = wasm.files.items(.data)[@intFromEnum(file_index)].object; for (object.features) |feature| { if (feature.prefix == .disallowed) continue; // already defined in 'disallowed' set. // from here a feature is always used const disallowed_feature = disallowed[@intFromEnum(feature.tag)]; if (@as(u1, @truncate(disallowed_feature)) != 0) { var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "feature '{}' is disallowed, but used by linked object", .{feature.tag}); try err.addNote(wasm, "disallowed by '{s}'", .{wasm.files.items(.data)[disallowed_feature >> 1].object.path}); try err.addNote(wasm, "used in '{s}'", .{object.path}); valid_feature_set = false; } object_used_features[@intFromEnum(feature.tag)] = true; } // validate the linked object file has each required feature for (required, 0..) |required_feature, feature_index| { const is_required = @as(u1, @truncate(required_feature)) != 0; if (is_required and !object_used_features[feature_index]) { var err = try wasm.addErrorWithNotes(2); try err.addMsg(wasm, "feature '{}' is required but not used in linked object", .{@as(types.Feature.Tag, @enumFromInt(feature_index))}); try err.addNote(wasm, "required by '{s}'", .{wasm.files.items(.data)[required_feature >> 1].object.path}); try err.addNote(wasm, "missing in '{s}'", .{object.path}); valid_feature_set = false; } } } if (!valid_feature_set) { return error.FlushFailure; } to_emit.* = allowed; } /// Creates synthetic linker-symbols, but only if they are being referenced from /// any object file. For instance, the `__heap_base` symbol will only be created, /// if one or multiple undefined references exist. When none exist, the symbol will /// not be created, ensuring we don't unneccesarily emit unreferenced symbols. fn resolveLazySymbols(wasm: *Wasm) !void { const comp = wasm.base.comp; const gpa = comp.gpa; const shared_memory = comp.config.shared_memory; if (wasm.string_table.getOffset("__heap_base")) |name_offset| { if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { const loc = try wasm.createSyntheticSymbolOffset(name_offset, .data); try wasm.discarded.putNoClobber(gpa, kv.value, loc); _ = wasm.resolved_symbols.swapRemove(loc); // we don't want to emit this symbol, only use it for relocations. } } if (wasm.string_table.getOffset("__heap_end")) |name_offset| { if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { const loc = try wasm.createSyntheticSymbolOffset(name_offset, .data); try wasm.discarded.putNoClobber(gpa, kv.value, loc); _ = wasm.resolved_symbols.swapRemove(loc); } } if (!shared_memory) { if (wasm.string_table.getOffset("__tls_base")) |name_offset| { if (wasm.undefs.fetchSwapRemove(name_offset)) |kv| { const loc = try wasm.createSyntheticSymbolOffset(name_offset, .global); try wasm.discarded.putNoClobber(gpa, kv.value, loc); _ = wasm.resolved_symbols.swapRemove(kv.value); const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); symbol.index = @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = true }, .init = .{ .i32_const = undefined }, }); } } } } // Tries to find a global symbol by its name. Returns null when not found, /// and its location when it is found. pub fn findGlobalSymbol(wasm: *Wasm, name: []const u8) ?SymbolLoc { const offset = wasm.string_table.getOffset(name) orelse return null; return wasm.globals.get(offset); } fn checkUndefinedSymbols(wasm: *const Wasm) !void { const comp = wasm.base.comp; if (comp.config.output_mode == .Obj) return; if (wasm.import_symbols) return; var found_undefined_symbols = false; for (wasm.undefs.values()) |undef| { const symbol = undef.getSymbol(wasm); if (symbol.tag == .data) { found_undefined_symbols = true; const file_name = if (wasm.file(undef.file)) |obj_file| obj_file.path() else wasm.name; const symbol_name = undef.getName(wasm); var err = try wasm.addErrorWithNotes(1); try err.addMsg(wasm, "could not resolve undefined symbol '{s}'", .{symbol_name}); try err.addNote(wasm, "defined in '{s}'", .{file_name}); } } if (found_undefined_symbols) { return error.FlushFailure; } } pub fn deinit(wasm: *Wasm) void { const gpa = wasm.base.comp.gpa; if (wasm.llvm_object) |llvm_object| llvm_object.deinit(); for (wasm.func_types.items) |*func_type| { func_type.deinit(gpa); } for (wasm.segment_info.values()) |segment_info| { gpa.free(segment_info.name); } if (wasm.zigObjectPtr()) |zig_obj| { zig_obj.deinit(wasm); } for (wasm.objects.items) |obj_index| { wasm.file(obj_index).?.object.deinit(gpa); } for (wasm.archives.items) |*archive| { archive.deinit(gpa); } if (wasm.findGlobalSymbol("__wasm_init_tls")) |loc| { const atom = wasm.symbol_atom.get(loc).?; wasm.getAtomPtr(atom).deinit(gpa); } wasm.synthetic_symbols.deinit(gpa); wasm.globals.deinit(gpa); wasm.resolved_symbols.deinit(gpa); wasm.undefs.deinit(gpa); wasm.discarded.deinit(gpa); wasm.symbol_atom.deinit(gpa); wasm.atoms.deinit(gpa); wasm.managed_atoms.deinit(gpa); wasm.segments.deinit(gpa); wasm.data_segments.deinit(gpa); wasm.segment_info.deinit(gpa); wasm.objects.deinit(gpa); wasm.archives.deinit(gpa); // free output sections wasm.imports.deinit(gpa); wasm.func_types.deinit(gpa); wasm.functions.deinit(gpa); wasm.wasm_globals.deinit(gpa); wasm.function_table.deinit(gpa); wasm.tables.deinit(gpa); wasm.init_funcs.deinit(gpa); wasm.exports.deinit(gpa); wasm.string_table.deinit(gpa); wasm.files.deinit(gpa); } pub fn updateFunc(wasm: *Wasm, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { if (build_options.skip_non_native and builtin.object_format != .wasm) { @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateFunc(mod, func_index, air, liveness); try wasm.zigObjectPtr().?.updateFunc(wasm, mod, func_index, air, liveness); } // Generate code for the Decl, storing it in memory to be later written to // the file on flush(). pub fn updateDecl(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { if (build_options.skip_non_native and builtin.object_format != .wasm) { @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateDecl(mod, decl_index); try wasm.zigObjectPtr().?.updateDecl(wasm, mod, decl_index); } pub fn updateDeclLineNumber(wasm: *Wasm, mod: *Module, decl_index: InternPool.DeclIndex) !void { if (wasm.llvm_object) |_| return; try wasm.zigObjectPtr().?.updateDeclLineNumber(mod, decl_index); } /// From a given symbol location, returns its `wasm.GlobalType`. /// Asserts the Symbol represents a global. fn getGlobalType(wasm: *const Wasm, loc: SymbolLoc) std.wasm.GlobalType { const symbol = loc.getSymbol(wasm); assert(symbol.tag == .global); const is_undefined = symbol.isUndefined(); if (wasm.file(loc.file)) |obj_file| { if (is_undefined) { return obj_file.import(loc.index).kind.global; } return obj_file.globals()[symbol.index - obj_file.importedGlobals()].global_type; } if (is_undefined) { return wasm.imports.get(loc).?.kind.global; } return wasm.wasm_globals.items[symbol.index].global_type; } /// From a given symbol location, returns its `wasm.Type`. /// Asserts the Symbol represents a function. fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type { const symbol = loc.getSymbol(wasm); assert(symbol.tag == .function); const is_undefined = symbol.isUndefined(); if (wasm.file(loc.file)) |obj_file| { if (is_undefined) { const ty_index = obj_file.import(loc.index).kind.function; return obj_file.funcTypes()[ty_index]; } const type_index = obj_file.function(loc.index).type_index; return obj_file.funcTypes()[type_index]; } if (is_undefined) { const ty_index = wasm.imports.get(loc).?.kind.function; return wasm.func_types.items[ty_index]; } return wasm.func_types.items[wasm.functions.get(.{ .file = loc.file, .index = symbol.index }).?.func.type_index]; } /// Lowers a constant typed value to a local symbol and atom. /// Returns the symbol index of the local /// The given `decl` is the parent decl whom owns the constant. pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.DeclIndex) !u32 { return wasm.zigObjectPtr().?.lowerUnnamedConst(wasm, tv, decl_index); } /// Returns the symbol index from a symbol of which its flag is set global, /// such as an exported or imported symbol. /// If the symbol does not yet exist, creates a new one symbol instead /// and then returns the index to it. pub fn getGlobalSymbol(wasm: *Wasm, name: []const u8, lib_name: ?[]const u8) !Symbol.Index { _ = lib_name; return wasm.zigObjectPtr().?.getGlobalSymbol(wasm.base.comp.gpa, name); } /// For a given decl, find the given symbol index's atom, and create a relocation for the type. /// Returns the given pointer address pub fn getDeclVAddr( wasm: *Wasm, decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo, ) !u64 { return wasm.zigObjectPtr().?.getDeclVAddr(wasm, decl_index, reloc_info); } pub fn lowerAnonDecl( wasm: *Wasm, decl_val: InternPool.Index, explicit_alignment: Alignment, src_loc: Module.SrcLoc, ) !codegen.Result { return wasm.zigObjectPtr().?.lowerAnonDecl(wasm, decl_val, explicit_alignment, src_loc); } pub fn getAnonDeclVAddr(wasm: *Wasm, decl_val: InternPool.Index, reloc_info: link.File.RelocInfo) !u64 { return wasm.zigObjectPtr().?.getAnonDeclVAddr(wasm, decl_val, reloc_info); } pub fn deleteDeclExport( wasm: *Wasm, decl_index: InternPool.DeclIndex, name: InternPool.NullTerminatedString, ) void { if (wasm.llvm_object) |_| return; return wasm.zigObjectPtr().?.deleteDeclExport(wasm, decl_index, name); } pub fn updateExports( wasm: *Wasm, mod: *Module, exported: Module.Exported, exports: []const *Module.Export, ) !void { if (build_options.skip_non_native and builtin.object_format != .wasm) { @panic("Attempted to compile for object format that was disabled by build configuration"); } if (wasm.llvm_object) |llvm_object| return llvm_object.updateExports(mod, exported, exports); return wasm.zigObjectPtr().?.updateExports(wasm, mod, exported, exports); } pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void { if (wasm.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); return wasm.zigObjectPtr().?.freeDecl(wasm, decl_index); } /// Assigns indexes to all indirect functions. /// Starts at offset 1, where the value `0` represents an unresolved function pointer /// or null-pointer fn mapFunctionTable(wasm: *Wasm) void { var it = wasm.function_table.iterator(); var index: u32 = 1; while (it.next()) |entry| { const symbol = entry.key_ptr.*.getSymbol(wasm); if (symbol.isAlive()) { entry.value_ptr.* = index; index += 1; } else { wasm.function_table.removeByPtr(entry.key_ptr); } } if (wasm.import_table or wasm.base.comp.config.output_mode == .Obj) { const sym_loc = wasm.findGlobalSymbol("__indirect_function_table").?; const import = wasm.imports.getPtr(sym_loc).?; import.kind.table.limits.min = index - 1; // we start at index 1. } else if (index > 1) { log.debug("Appending indirect function table", .{}); const sym_loc = wasm.findGlobalSymbol("__indirect_function_table").?; const symbol = sym_loc.getSymbol(wasm); const table = &wasm.tables.items[symbol.index - wasm.imported_tables_count]; table.limits = .{ .min = index, .max = index, .flags = 0x1 }; } } /// From a given index, append the given `Atom` at the back of the linked list. /// Simply inserts it into the map of atoms when it doesn't exist yet. pub fn appendAtomAtIndex(wasm: *Wasm, index: u32, atom_index: Atom.Index) !void { const gpa = wasm.base.comp.gpa; const atom = wasm.getAtomPtr(atom_index); if (wasm.atoms.getPtr(index)) |last_index_ptr| { atom.prev = last_index_ptr.*; last_index_ptr.* = atom_index; } else { try wasm.atoms.putNoClobber(gpa, index, atom_index); } } fn allocateAtoms(wasm: *Wasm) !void { // first sort the data segments try sortDataSegments(wasm); var it = wasm.atoms.iterator(); while (it.next()) |entry| { const segment = &wasm.segments.items[entry.key_ptr.*]; var atom_index = entry.value_ptr.*; if (entry.key_ptr.* == wasm.code_section_index) { // Code section is allocated upon writing as they are required to be ordered // to synchronise with the function section. continue; } var offset: u32 = 0; while (true) { const atom = wasm.getAtomPtr(atom_index); const symbol_loc = atom.symbolLoc(); // Ensure we get the original symbol, so we verify the correct symbol on whether // it is dead or not and ensure an atom is removed when dead. // This is required as we may have parsed aliases into atoms. const sym = if (wasm.file(symbol_loc.file)) |obj_file| obj_file.symbol(symbol_loc.index).* else wasm.synthetic_symbols.items[@intFromEnum(symbol_loc.index)]; // Dead symbols must be unlinked from the linked-list to prevent them // from being emit into the binary. if (sym.isDead()) { if (entry.value_ptr.* == atom_index and atom.prev != .null) { // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update // the entry to point it to the previous atom to ensure we do not start with a dead symbol that // was removed and therefore do not emit any code at all. entry.value_ptr.* = atom.prev; } if (atom.prev == .null) break; atom_index = atom.prev; atom.prev = .null; continue; } offset = @intCast(atom.alignment.forward(offset)); atom.offset = offset; log.debug("Atom '{s}' allocated from 0x{x:0>8} to 0x{x:0>8} size={d}", .{ symbol_loc.getName(wasm), offset, offset + atom.size, atom.size, }); offset += atom.size; if (atom.prev == .null) break; atom_index = atom.prev; } segment.size = @intCast(segment.alignment.forward(offset)); } } /// For each data symbol, sets the virtual address. fn allocateVirtualAddresses(wasm: *Wasm) void { for (wasm.resolved_symbols.keys()) |loc| { const symbol = loc.getSymbol(wasm); if (symbol.tag != .data or symbol.isDead()) { // Only data symbols have virtual addresses. // Dead symbols do not get allocated, so we don't need to set their virtual address either. continue; } const atom_index = wasm.symbol_atom.get(loc) orelse { // synthetic symbol that does not contain an atom continue; }; const atom = wasm.getAtom(atom_index); const merge_segment = wasm.base.comp.config.output_mode != .Obj; const segment_info = if (atom.file != .null) wasm.file(atom.file).?.segmentInfo() else wasm.segment_info.values(); const segment_name = segment_info[symbol.index].outputName(merge_segment); const segment_index = wasm.data_segments.get(segment_name).?; const segment = wasm.segments.items[segment_index]; // TLS symbols have their virtual address set relative to their own TLS segment, // rather than the entire Data section. if (symbol.hasFlag(.WASM_SYM_TLS)) { symbol.virtual_address = atom.offset; } else { symbol.virtual_address = atom.offset + segment.offset; } } } fn sortDataSegments(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; var new_mapping: std.StringArrayHashMapUnmanaged(u32) = .{}; try new_mapping.ensureUnusedCapacity(gpa, wasm.data_segments.count()); errdefer new_mapping.deinit(gpa); const keys = try gpa.dupe([]const u8, wasm.data_segments.keys()); defer gpa.free(keys); const SortContext = struct { fn sort(_: void, lhs: []const u8, rhs: []const u8) bool { return order(lhs) < order(rhs); } fn order(name: []const u8) u8 { if (mem.startsWith(u8, name, ".rodata")) return 0; if (mem.startsWith(u8, name, ".data")) return 1; if (mem.startsWith(u8, name, ".text")) return 2; return 3; } }; mem.sort([]const u8, keys, {}, SortContext.sort); for (keys) |key| { const segment_index = wasm.data_segments.get(key).?; new_mapping.putAssumeCapacity(key, segment_index); } wasm.data_segments.deinit(gpa); wasm.data_segments = new_mapping; } /// Obtains all initfuncs from each object file, verifies its function signature, /// and then appends it to our final `init_funcs` list. /// After all functions have been inserted, the functions will be ordered based /// on their priority. /// NOTE: This function must be called before we merged any other section. /// This is because all init funcs in the object files contain references to the /// original functions and their types. We need to know the type to verify it doesn't /// contain any parameters. fn setupInitFunctions(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; // There's no constructors for Zig so we can simply search through linked object files only. for (wasm.objects.items) |file_index| { const object: Object = wasm.files.items(.data)[@intFromEnum(file_index)].object; try wasm.init_funcs.ensureUnusedCapacity(gpa, object.init_funcs.len); for (object.init_funcs) |init_func| { const symbol = object.symtable[init_func.symbol_index]; const ty: std.wasm.Type = if (symbol.isUndefined()) ty: { const imp: types.Import = object.findImport(symbol); break :ty object.func_types[imp.kind.function]; } else ty: { const func_index = symbol.index - object.imported_functions_count; const func = object.functions[func_index]; break :ty object.func_types[func.type_index]; }; if (ty.params.len != 0) { try wasm.addErrorWithoutNotes("constructor functions cannot take arguments: '{s}'", .{object.string_table.get(symbol.name)}); } log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)}); wasm.init_funcs.appendAssumeCapacity(.{ .index = @enumFromInt(init_func.symbol_index), .file = file_index, .priority = init_func.priority, }); try wasm.mark(.{ .index = @enumFromInt(init_func.symbol_index), .file = file_index }); } } // sort the initfunctions based on their priority mem.sort(InitFuncLoc, wasm.init_funcs.items, {}, InitFuncLoc.lessThan); if (wasm.init_funcs.items.len > 0) { const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; try wasm.mark(loc); } } /// Creates a function body for the `__wasm_call_ctors` symbol. /// Loops over all constructors found in `init_funcs` and calls them /// respectively based on their priority which was sorted by `setupInitFunctions`. /// NOTE: This function must be called after we merged all sections to ensure the /// references to the function stored in the symbol have been finalized so we end /// up calling the resolved function. fn initializeCallCtorsFunction(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; // No code to emit, so also no ctors to call if (wasm.code_section_index == null) { // Make sure to remove it from the resolved symbols so we do not emit // it within any section. TODO: Remove this once we implement garbage collection. const loc = wasm.findGlobalSymbol("__wasm_call_ctors").?; assert(wasm.resolved_symbols.swapRemove(loc)); return; } var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); // Create the function body { // Write locals count (we have none) try leb.writeULEB128(writer, @as(u32, 0)); // call constructors for (wasm.init_funcs.items) |init_func_loc| { const symbol = init_func_loc.getSymbol(wasm); const func = wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; const ty = wasm.func_types.items[func.type_index]; // Call function by its function index try writer.writeByte(std.wasm.opcode(.call)); try leb.writeULEB128(writer, symbol.index); // drop all returned values from the stack as __wasm_call_ctors has no return value for (ty.returns) |_| { try writer.writeByte(std.wasm.opcode(.drop)); } } // End function body try writer.writeByte(std.wasm.opcode(.end)); } try wasm.createSyntheticFunction( "__wasm_call_ctors", std.wasm.Type{ .params = &.{}, .returns = &.{} }, &function_body, ); } fn createSyntheticFunction( wasm: *Wasm, symbol_name: []const u8, func_ty: std.wasm.Type, function_body: *std.ArrayList(u8), ) !void { const gpa = wasm.base.comp.gpa; const loc = wasm.findGlobalSymbol(symbol_name).?; // forgot to create symbol? const symbol = loc.getSymbol(wasm); if (symbol.isDead()) { return; } const ty_index = try wasm.putOrGetFuncType(func_ty); // create function with above type const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count())); try wasm.functions.putNoClobber( gpa, .{ .file = .null, .index = func_index }, .{ .func = .{ .type_index = ty_index }, .sym_index = loc.index }, ); symbol.index = func_index; // create the atom that will be output into the final binary const atom_index = try wasm.createAtom(loc.index, .null); const atom = wasm.getAtomPtr(atom_index); atom.size = @intCast(function_body.items.len); atom.code = function_body.moveToUnmanaged(); try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); } /// Unlike `createSyntheticFunction` this function is to be called by /// the codegeneration backend. This will not allocate the created Atom yet. /// Returns the index of the symbol. pub fn createFunction( wasm: *Wasm, symbol_name: []const u8, func_ty: std.wasm.Type, function_body: *std.ArrayList(u8), relocations: *std.ArrayList(Relocation), ) !Symbol.Index { return wasm.zigObjectPtr().?.createFunction(wasm, symbol_name, func_ty, function_body, relocations); } /// If required, sets the function index in the `start` section. fn setupStartSection(wasm: *Wasm) !void { if (wasm.findGlobalSymbol("__wasm_init_memory")) |loc| { wasm.entry = loc.getSymbol(wasm).index; } } fn initializeTLSFunction(wasm: *Wasm) !void { const comp = wasm.base.comp; const gpa = comp.gpa; const shared_memory = comp.config.shared_memory; if (!shared_memory) return; // ensure function is marked as we must emit it wasm.findGlobalSymbol("__wasm_init_tls").?.getSymbol(wasm).mark(); var function_body = std.ArrayList(u8).init(gpa); defer function_body.deinit(); const writer = function_body.writer(); // locals try writer.writeByte(0); // If there's a TLS segment, initialize it during runtime using the bulk-memory feature if (wasm.data_segments.getIndex(".tdata")) |data_index| { const segment_index = wasm.data_segments.entries.items(.value)[data_index]; const segment = wasm.segments.items[segment_index]; const param_local: u32 = 0; try writer.writeByte(std.wasm.opcode(.local_get)); try leb.writeULEB128(writer, param_local); const tls_base_loc = wasm.findGlobalSymbol("__tls_base").?; try writer.writeByte(std.wasm.opcode(.global_set)); try leb.writeULEB128(writer, tls_base_loc.getSymbol(wasm).index); // load stack values for the bulk-memory operation { try writer.writeByte(std.wasm.opcode(.local_get)); try leb.writeULEB128(writer, param_local); try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeULEB128(writer, @as(u32, 0)); //segment offset try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeULEB128(writer, @as(u32, segment.size)); //segment offset } // perform the bulk-memory operation to initialize the data segment try writer.writeByte(std.wasm.opcode(.misc_prefix)); try leb.writeULEB128(writer, std.wasm.miscOpcode(.memory_init)); // segment immediate try leb.writeULEB128(writer, @as(u32, @intCast(data_index))); // memory index immediate (always 0) try leb.writeULEB128(writer, @as(u32, 0)); } // If we have to perform any TLS relocations, call the corresponding function // which performs all runtime TLS relocations. This is a synthetic function, // generated by the linker. if (wasm.findGlobalSymbol("__wasm_apply_global_tls_relocs")) |loc| { try writer.writeByte(std.wasm.opcode(.call)); try leb.writeULEB128(writer, loc.getSymbol(wasm).index); loc.getSymbol(wasm).mark(); } try writer.writeByte(std.wasm.opcode(.end)); try wasm.createSyntheticFunction( "__wasm_init_tls", std.wasm.Type{ .params = &.{.i32}, .returns = &.{} }, &function_body, ); } fn setupImports(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; log.debug("Merging imports", .{}); for (wasm.resolved_symbols.keys()) |symbol_loc| { const obj_file = wasm.file(symbol_loc.file) orelse { // Synthetic symbols will already exist in the `import` section continue; }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.isDead() or !symbol.requiresImport() or std.mem.eql(u8, symbol_loc.getName(wasm), "__indirect_function_table")) { continue; } log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)}); const import = obj_file.import(symbol_loc.index); // We copy the import to a new import to ensure the names contain references // to the internal string table, rather than of the object file. const new_imp: types.Import = .{ .module_name = try wasm.string_table.put(gpa, obj_file.string(import.module_name)), .name = try wasm.string_table.put(gpa, obj_file.string(import.name)), .kind = import.kind, }; // TODO: De-duplicate imports when they contain the same names and type try wasm.imports.putNoClobber(gpa, symbol_loc, new_imp); } // Assign all indexes of the imports to their representing symbols var function_index: u32 = 0; var global_index: u32 = 0; var table_index: u32 = 0; var it = wasm.imports.iterator(); while (it.next()) |entry| { const symbol = entry.key_ptr.*.getSymbol(wasm); const import: types.Import = entry.value_ptr.*; switch (import.kind) { .function => { symbol.index = function_index; function_index += 1; }, .global => { symbol.index = global_index; global_index += 1; }, .table => { symbol.index = table_index; table_index += 1; }, else => unreachable, } } wasm.imported_functions_count = function_index; wasm.imported_globals_count = global_index; wasm.imported_tables_count = table_index; log.debug("Merged ({d}) functions, ({d}) globals, and ({d}) tables into import section", .{ function_index, global_index, table_index, }); } /// Takes the global, function and table section from each linked object file /// and merges it into a single section for each. fn mergeSections(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; var removed_duplicates = std.ArrayList(SymbolLoc).init(gpa); defer removed_duplicates.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { const obj_file = wasm.file(sym_loc.file) orelse { // Synthetic symbols already live in the corresponding sections. continue; }; const symbol = obj_file.symbol(sym_loc.index); if (symbol.isDead() or symbol.isUndefined()) { // Skip undefined symbols as they go in the `import` section continue; } switch (symbol.tag) { .function => { const gop = try wasm.functions.getOrPut( gpa, .{ .file = sym_loc.file, .index = symbol.index }, ); if (gop.found_existing) { // We found an alias to the same function, discard this symbol in favor of // the original symbol and point the discard function to it. This ensures // we only emit a single function, instead of duplicates. // we favor keeping the global over a local. const original_loc: SymbolLoc = .{ .file = gop.key_ptr.file, .index = gop.value_ptr.sym_index }; const original_sym = original_loc.getSymbol(wasm); if (original_sym.isLocal() and symbol.isGlobal()) { original_sym.unmark(); try wasm.discarded.put(gpa, original_loc, sym_loc); try removed_duplicates.append(original_loc); } else { symbol.unmark(); try wasm.discarded.putNoClobber(gpa, sym_loc, original_loc); try removed_duplicates.append(sym_loc); continue; } } gop.value_ptr.* = .{ .func = obj_file.function(sym_loc.index), .sym_index = sym_loc.index }; symbol.index = @as(u32, @intCast(gop.index)) + wasm.imported_functions_count; }, .global => { const index = symbol.index - obj_file.importedFunctions(); const original_global = obj_file.globals()[index]; symbol.index = @as(u32, @intCast(wasm.wasm_globals.items.len)) + wasm.imported_globals_count; try wasm.wasm_globals.append(gpa, original_global); }, .table => { const index = symbol.index - obj_file.importedFunctions(); // assert it's a regular relocatable object file as `ZigObject` will never // contain a table. const original_table = obj_file.object.tables[index]; symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count; try wasm.tables.append(gpa, original_table); }, .dead, .undefined => unreachable, else => {}, } } // For any removed duplicates, remove them from the resolved symbols list for (removed_duplicates.items) |sym_loc| { assert(wasm.resolved_symbols.swapRemove(sym_loc)); gc_log.debug("Removed duplicate for function '{s}'", .{sym_loc.getName(wasm)}); } log.debug("Merged ({d}) functions", .{wasm.functions.count()}); log.debug("Merged ({d}) globals", .{wasm.wasm_globals.items.len}); log.debug("Merged ({d}) tables", .{wasm.tables.items.len}); } /// Merges function types of all object files into the final /// 'types' section, while assigning the type index to the representing /// section (import, export, function). fn mergeTypes(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; // A map to track which functions have already had their // type inserted. If we do this for the same function multiple times, // it will be overwritten with the incorrect type. var dirty = std.AutoHashMap(u32, void).init(gpa); try dirty.ensureUnusedCapacity(@as(u32, @intCast(wasm.functions.count()))); defer dirty.deinit(); for (wasm.resolved_symbols.keys()) |sym_loc| { const obj_file = wasm.file(sym_loc.file) orelse { // zig code-generated symbols are already present in final type section continue; }; const symbol = obj_file.symbol(sym_loc.index); if (symbol.tag != .function or symbol.isDead()) { // Only functions have types. Only retrieve the type of referenced functions. continue; } if (symbol.isUndefined()) { log.debug("Adding type from extern function '{s}'", .{sym_loc.getName(wasm)}); const import: *types.Import = wasm.imports.getPtr(sym_loc) orelse continue; const original_type = obj_file.funcTypes()[import.kind.function]; import.kind.function = try wasm.putOrGetFuncType(original_type); } else if (!dirty.contains(symbol.index)) { log.debug("Adding type from function '{s}'", .{sym_loc.getName(wasm)}); const func = &wasm.functions.values()[symbol.index - wasm.imported_functions_count].func; func.type_index = try wasm.putOrGetFuncType(obj_file.funcTypes()[func.type_index]); dirty.putAssumeCapacityNoClobber(symbol.index, {}); } } log.debug("Completed merging and deduplicating types. Total count: ({d})", .{wasm.func_types.items.len}); } fn checkExportNames(wasm: *Wasm) !void { const force_exp_names = wasm.export_symbol_names; if (force_exp_names.len > 0) { var failed_exports = false; for (force_exp_names) |exp_name| { const loc = wasm.findGlobalSymbol(exp_name) orelse { try wasm.addErrorWithoutNotes("could not export '{s}', symbol not found", .{exp_name}); failed_exports = true; continue; }; const symbol = loc.getSymbol(wasm); symbol.setFlag(.WASM_SYM_EXPORTED); } if (failed_exports) { return error.FlushFailure; } } } fn setupExports(wasm: *Wasm) !void { const comp = wasm.base.comp; const gpa = comp.gpa; if (comp.config.output_mode == .Obj) return; log.debug("Building exports from symbols", .{}); for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm); if (!symbol.isExported(comp.config.rdynamic)) continue; const sym_name = sym_loc.getName(wasm); const export_name = if (sym_loc.file == .null) symbol.name else try wasm.string_table.put(gpa, sym_name); const exp: types.Export = if (symbol.tag == .data) exp: { const global_index = @as(u32, @intCast(wasm.imported_globals_count + wasm.wasm_globals.items.len)); try wasm.wasm_globals.append(gpa, .{ .global_type = .{ .valtype = .i32, .mutable = false }, .init = .{ .i32_const = @as(i32, @intCast(symbol.virtual_address)) }, }); break :exp .{ .name = export_name, .kind = .global, .index = global_index, }; } else .{ .name = export_name, .kind = symbol.tag.externalType(), .index = symbol.index, }; log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ sym_name, wasm.string_table.get(exp.name), exp.index, }); try wasm.exports.append(gpa, exp); } log.debug("Completed building exports. Total count: ({d})", .{wasm.exports.items.len}); } fn setupStart(wasm: *Wasm) !void { const comp = wasm.base.comp; // do not export entry point if user set none or no default was set. const entry_name = wasm.entry_name orelse return; const symbol_loc = wasm.findGlobalSymbol(entry_name) orelse { try wasm.addErrorWithoutNotes("Entry symbol '{s}' missing, use '-fno-entry' to suppress", .{entry_name}); return error.FlushFailure; }; const symbol = symbol_loc.getSymbol(wasm); if (symbol.tag != .function) { try wasm.addErrorWithoutNotes("Entry symbol '{s}' is not a function", .{entry_name}); return error.FlushFailure; } // Ensure the symbol is exported so host environment can access it if (comp.config.output_mode != .Obj) { symbol.setFlag(.WASM_SYM_EXPORTED); } } /// Sets up the memory section of the wasm module, as well as the stack. fn setupMemory(wasm: *Wasm) !void { const comp = wasm.base.comp; const shared_memory = comp.config.shared_memory; log.debug("Setting up memory layout", .{}); const page_size = std.wasm.page_size; // 64kb const stack_alignment: Alignment = .@"16"; // wasm's stack alignment as specified by tool-convention const heap_alignment: Alignment = .@"16"; // wasm's heap alignment as specified by tool-convention // Always place the stack at the start by default // unless the user specified the global-base flag var place_stack_first = true; var memory_ptr: u64 = if (wasm.global_base) |base| blk: { place_stack_first = false; break :blk base; } else 0; const is_obj = comp.config.output_mode == .Obj; const stack_ptr = if (wasm.findGlobalSymbol("__stack_pointer")) |loc| index: { const sym = loc.getSymbol(wasm); break :index sym.index - wasm.imported_globals_count; } else null; if (place_stack_first and !is_obj) { memory_ptr = stack_alignment.forward(memory_ptr); memory_ptr += wasm.base.stack_size; // We always put the stack pointer global at index 0 if (stack_ptr) |index| { wasm.wasm_globals.items[index].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); } } var offset: u32 = @as(u32, @intCast(memory_ptr)); var data_seg_it = wasm.data_segments.iterator(); while (data_seg_it.next()) |entry| { const segment = &wasm.segments.items[entry.value_ptr.*]; memory_ptr = segment.alignment.forward(memory_ptr); // set TLS-related symbols if (mem.eql(u8, entry.key_ptr.*, ".tdata")) { if (wasm.findGlobalSymbol("__tls_size")) |loc| { const sym = loc.getSymbol(wasm); wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = @intCast(segment.size); } if (wasm.findGlobalSymbol("__tls_align")) |loc| { const sym = loc.getSymbol(wasm); wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = @intCast(segment.alignment.toByteUnitsOptional().?); } if (wasm.findGlobalSymbol("__tls_base")) |loc| { const sym = loc.getSymbol(wasm); wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = if (shared_memory) @as(i32, 0) else @as(i32, @intCast(memory_ptr)); } } memory_ptr += segment.size; segment.offset = offset; offset += segment.size; } // create the memory init flag which is used by the init memory function if (shared_memory and wasm.hasPassiveInitializationSegments()) { // align to pointer size memory_ptr = mem.alignForward(u64, memory_ptr, 4); const loc = try wasm.createSyntheticSymbol("__wasm_init_memory_flag", .data); const sym = loc.getSymbol(wasm); sym.mark(); sym.virtual_address = @as(u32, @intCast(memory_ptr)); memory_ptr += 4; } if (!place_stack_first and !is_obj) { memory_ptr = stack_alignment.forward(memory_ptr); memory_ptr += wasm.base.stack_size; if (stack_ptr) |index| { wasm.wasm_globals.items[index].init.i32_const = @as(i32, @bitCast(@as(u32, @intCast(memory_ptr)))); } } // One of the linked object files has a reference to the __heap_base symbol. // We must set its virtual address so it can be used in relocations. if (wasm.findGlobalSymbol("__heap_base")) |loc| { const symbol = loc.getSymbol(wasm); symbol.virtual_address = @intCast(heap_alignment.forward(memory_ptr)); } // Setup the max amount of pages // For now we only support wasm32 by setting the maximum allowed memory size 2^32-1 const max_memory_allowed: u64 = (1 << 32) - 1; if (wasm.initial_memory) |initial_memory| { if (!std.mem.isAlignedGeneric(u64, initial_memory, page_size)) { try wasm.addErrorWithoutNotes("Initial memory must be {d}-byte aligned", .{page_size}); } if (memory_ptr > initial_memory) { try wasm.addErrorWithoutNotes("Initial memory too small, must be at least {d} bytes", .{memory_ptr}); } if (initial_memory > max_memory_allowed) { try wasm.addErrorWithoutNotes("Initial memory exceeds maximum memory {d}", .{max_memory_allowed}); } memory_ptr = initial_memory; } memory_ptr = mem.alignForward(u64, memory_ptr, std.wasm.page_size); // In case we do not import memory, but define it ourselves, // set the minimum amount of pages on the memory section. wasm.memories.limits.min = @as(u32, @intCast(memory_ptr / page_size)); log.debug("Total memory pages: {d}", .{wasm.memories.limits.min}); if (wasm.findGlobalSymbol("__heap_end")) |loc| { const symbol = loc.getSymbol(wasm); symbol.virtual_address = @as(u32, @intCast(memory_ptr)); } if (wasm.max_memory) |max_memory| { if (!std.mem.isAlignedGeneric(u64, max_memory, page_size)) { try wasm.addErrorWithoutNotes("Maximum memory must be {d}-byte aligned", .{page_size}); } if (memory_ptr > max_memory) { try wasm.addErrorWithoutNotes("Maxmimum memory too small, must be at least {d} bytes", .{memory_ptr}); } if (max_memory > max_memory_allowed) { try wasm.addErrorWithoutNotes("Maximum memory exceeds maxmium amount {d}", .{max_memory_allowed}); } wasm.memories.limits.max = @as(u32, @intCast(max_memory / page_size)); wasm.memories.limits.setFlag(.WASM_LIMITS_FLAG_HAS_MAX); if (shared_memory) { wasm.memories.limits.setFlag(.WASM_LIMITS_FLAG_IS_SHARED); } log.debug("Maximum memory pages: {?d}", .{wasm.memories.limits.max}); } } /// From a given object's index and the index of the segment, returns the corresponding /// index of the segment within the final data section. When the segment does not yet /// exist, a new one will be initialized and appended. The new index will be returned in that case. pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: Symbol.Index) !u32 { const comp = wasm.base.comp; const gpa = comp.gpa; const obj_file = wasm.file(file_index).?; const symbol = obj_file.symbols()[@intFromEnum(symbol_index)]; const index: u32 = @intCast(wasm.segments.items.len); const shared_memory = comp.config.shared_memory; switch (symbol.tag) { .data => { const segment_info = obj_file.segmentInfo()[symbol.index]; const merge_segment = comp.config.output_mode != .Obj; const result = try wasm.data_segments.getOrPut(gpa, segment_info.outputName(merge_segment)); if (!result.found_existing) { result.value_ptr.* = index; var flags: u32 = 0; if (shared_memory) { flags |= @intFromEnum(Segment.Flag.WASM_DATA_SEGMENT_IS_PASSIVE); } try wasm.segments.append(gpa, .{ .alignment = .@"1", .size = 0, .offset = 0, .flags = flags, }); try wasm.segment_info.putNoClobber(gpa, index, .{ .name = try gpa.dupe(u8, segment_info.name), .alignment = segment_info.alignment, .flags = segment_info.flags, }); return index; } else return result.value_ptr.*; }, .function => return wasm.code_section_index orelse blk: { wasm.code_section_index = index; try wasm.appendDummySegment(); break :blk index; }, .section => { const section_name = obj_file.symbolName(symbol_index); if (mem.eql(u8, section_name, ".debug_info")) { return wasm.debug_info_index orelse blk: { wasm.debug_info_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_line")) { return wasm.debug_line_index orelse blk: { wasm.debug_line_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_loc")) { return wasm.debug_loc_index orelse blk: { wasm.debug_loc_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_ranges")) { return wasm.debug_ranges_index orelse blk: { wasm.debug_ranges_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_pubnames")) { return wasm.debug_pubnames_index orelse blk: { wasm.debug_pubnames_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_pubtypes")) { return wasm.debug_pubtypes_index orelse blk: { wasm.debug_pubtypes_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_abbrev")) { return wasm.debug_abbrev_index orelse blk: { wasm.debug_abbrev_index = index; try wasm.appendDummySegment(); break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_str")) { return wasm.debug_str_index orelse blk: { wasm.debug_str_index = index; try wasm.appendDummySegment(); break :blk index; }; } else { var err = try wasm.addErrorWithNotes(1); try err.addMsg(wasm, "found unknown section '{s}'", .{section_name}); try err.addNote(wasm, "defined in '{s}'", .{obj_file.path()}); return error.UnexpectedValue; } }, else => unreachable, } } /// Appends a new segment with default field values fn appendDummySegment(wasm: *Wasm) !void { const gpa = wasm.base.comp.gpa; try wasm.segments.append(gpa, .{ .alignment = .@"1", .size = 0, .offset = 0, .flags = 0, }); } pub fn flush(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { const comp = wasm.base.comp; const use_lld = build_options.have_llvm and comp.config.use_lld; if (use_lld) { return wasm.linkWithLLD(arena, prog_node); } return wasm.flushModule(arena, prog_node); } /// Uses the in-house linker to link one or multiple object -and archive files into a WebAssembly binary. pub fn flushModule(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); const comp = wasm.base.comp; if (wasm.llvm_object) |llvm_object| { try wasm.base.emitLlvmObject(arena, llvm_object, prog_node); const use_lld = build_options.have_llvm and comp.config.use_lld; if (use_lld) return; } var sub_prog_node = prog_node.start("Wasm Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); const directory = wasm.base.emit.directory; // Just an alias to make it shorter to type. const full_out_path = try directory.join(arena, &[_][]const u8{wasm.base.emit.sub_path}); const module_obj_path: ?[]const u8 = if (wasm.base.zcu_object_sub_path) |path| blk: { if (fs.path.dirname(full_out_path)) |dirname| { break :blk try fs.path.join(arena, &.{ dirname, path }); } else { break :blk path; } } else null; // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); try positionals.ensureUnusedCapacity(comp.objects.len); const target = comp.root_mod.resolved_target.result; const output_mode = comp.config.output_mode; const link_mode = comp.config.link_mode; const link_libc = comp.config.link_libc; const link_libcpp = comp.config.link_libcpp; const wasi_exec_model = comp.config.wasi_exec_model; if (wasm.zigObjectPtr()) |zig_object| { try zig_object.flushModule(wasm); } // When the target os is WASI, we allow linking with WASI-LIBC if (target.os.tag == .wasi) { const is_exe_or_dyn_lib = output_mode == .Exe or (output_mode == .Lib and link_mode == .dynamic); if (is_exe_or_dyn_lib) { for (comp.wasi_emulated_libs) |crt_file| { try positionals.append(try comp.get_libc_crt_file( arena, wasi_libc.emulatedLibCRFileLibName(crt_file), )); } if (link_libc) { try positionals.append(try comp.get_libc_crt_file( arena, wasi_libc.execModelCrtFileFullName(wasi_exec_model), )); try positionals.append(try comp.get_libc_crt_file(arena, "libc.a")); } if (link_libcpp) { try positionals.append(comp.libcxx_static_lib.?.full_object_path); try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); } } } if (module_obj_path) |path| { try positionals.append(path); } for (comp.objects) |object| { try positionals.append(object.path); } for (comp.c_object_table.keys()) |c_object| { try positionals.append(c_object.status.success.object_path); } if (comp.compiler_rt_lib) |lib| try positionals.append(lib.full_object_path); if (comp.compiler_rt_obj) |obj| try positionals.append(obj.full_object_path); try wasm.parseInputFiles(positionals.items); if (wasm.zig_object_index != .null) { try wasm.resolveSymbolsInObject(wasm.zig_object_index); } if (comp.link_errors.items.len > 0) return error.FlushFailure; for (wasm.objects.items) |object_index| { try wasm.resolveSymbolsInObject(object_index); } if (comp.link_errors.items.len > 0) return error.FlushFailure; var emit_features_count: u32 = 0; var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; try wasm.validateFeatures(&enabled_features, &emit_features_count); try wasm.resolveSymbolsInArchives(); if (comp.link_errors.items.len > 0) return error.FlushFailure; try wasm.resolveLazySymbols(); try wasm.checkUndefinedSymbols(); try wasm.checkExportNames(); try wasm.setupInitFunctions(); if (comp.link_errors.items.len > 0) return error.FlushFailure; try wasm.setupStart(); try wasm.markReferences(); try wasm.setupImports(); try wasm.mergeSections(); try wasm.mergeTypes(); try wasm.allocateAtoms(); try wasm.setupMemory(); if (comp.link_errors.items.len > 0) return error.FlushFailure; wasm.allocateVirtualAddresses(); wasm.mapFunctionTable(); try wasm.initializeCallCtorsFunction(); try wasm.setupInitMemoryFunction(); try wasm.setupTLSRelocationsFunction(); try wasm.initializeTLSFunction(); try wasm.setupStartSection(); try wasm.setupExports(); try wasm.writeToFile(enabled_features, emit_features_count, arena); if (comp.link_errors.items.len > 0) return error.FlushFailure; } /// Writes the WebAssembly in-memory module to the file fn writeToFile( wasm: *Wasm, enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool, feature_count: u32, arena: Allocator, ) !void { const comp = wasm.base.comp; const gpa = comp.gpa; const use_llvm = comp.config.use_llvm; const use_lld = build_options.have_llvm and comp.config.use_lld; const shared_memory = comp.config.shared_memory; const import_memory = comp.config.import_memory; const export_memory = comp.config.export_memory; // Size of each section header const header_size = 5 + 1; // The amount of sections that will be written var section_count: u32 = 0; // Index of the code section. Used to tell relocation table where the section lives. var code_section_index: ?u32 = null; // Index of the data section. Used to tell relocation table where the section lives. var data_section_index: ?u32 = null; const is_obj = comp.config.output_mode == .Obj or (!use_llvm and use_lld); var binary_bytes = std.ArrayList(u8).init(gpa); defer binary_bytes.deinit(); const binary_writer = binary_bytes.writer(); // We write the magic bytes at the end so they will only be written // if everything succeeded as expected. So populate with 0's for now. try binary_writer.writeAll(&[_]u8{0} ** 8); // (Re)set file pointer to 0 try wasm.base.file.?.setEndPos(0); try wasm.base.file.?.seekTo(0); // Type section if (wasm.func_types.items.len != 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); log.debug("Writing type section. Count: ({d})", .{wasm.func_types.items.len}); for (wasm.func_types.items) |func_type| { try leb.writeULEB128(binary_writer, std.wasm.function_type); try leb.writeULEB128(binary_writer, @as(u32, @intCast(func_type.params.len))); for (func_type.params) |param_ty| { try leb.writeULEB128(binary_writer, std.wasm.valtype(param_ty)); } try leb.writeULEB128(binary_writer, @as(u32, @intCast(func_type.returns.len))); for (func_type.returns) |ret_ty| { try leb.writeULEB128(binary_writer, std.wasm.valtype(ret_ty)); } } try writeVecSectionHeader( binary_bytes.items, header_offset, .type, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.func_types.items.len), ); section_count += 1; } // Import section if (wasm.imports.count() != 0 or import_memory) { const header_offset = try reserveVecSectionHeader(&binary_bytes); var it = wasm.imports.iterator(); while (it.next()) |entry| { assert(entry.key_ptr.*.getSymbol(wasm).isUndefined()); const import = entry.value_ptr.*; try wasm.emitImport(binary_writer, import); } if (import_memory) { const mem_name = if (is_obj) "__linear_memory" else "memory"; const mem_imp: types.Import = .{ .module_name = try wasm.string_table.put(gpa, wasm.host_name), .name = try wasm.string_table.put(gpa, mem_name), .kind = .{ .memory = wasm.memories.limits }, }; try wasm.emitImport(binary_writer, mem_imp); } try writeVecSectionHeader( binary_bytes.items, header_offset, .import, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.imports.count() + @intFromBool(import_memory)), ); section_count += 1; } // Function section if (wasm.functions.count() != 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.functions.values()) |function| { try leb.writeULEB128(binary_writer, function.func.type_index); } try writeVecSectionHeader( binary_bytes.items, header_offset, .function, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.functions.count()), ); section_count += 1; } // Table section if (wasm.tables.items.len > 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.tables.items) |table| { try leb.writeULEB128(binary_writer, std.wasm.reftype(table.reftype)); try emitLimits(binary_writer, table.limits); } try writeVecSectionHeader( binary_bytes.items, header_offset, .table, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.tables.items.len), ); section_count += 1; } // Memory section if (!import_memory) { const header_offset = try reserveVecSectionHeader(&binary_bytes); try emitLimits(binary_writer, wasm.memories.limits); try writeVecSectionHeader( binary_bytes.items, header_offset, .memory, @intCast(binary_bytes.items.len - header_offset - header_size), 1, // wasm currently only supports 1 linear memory segment ); section_count += 1; } // Global section (used to emit stack pointer) if (wasm.wasm_globals.items.len > 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.wasm_globals.items) |global| { try binary_writer.writeByte(std.wasm.valtype(global.global_type.valtype)); try binary_writer.writeByte(@intFromBool(global.global_type.mutable)); try emitInit(binary_writer, global.init); } try writeVecSectionHeader( binary_bytes.items, header_offset, .global, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.wasm_globals.items.len), ); section_count += 1; } // Export section if (wasm.exports.items.len != 0 or export_memory) { const header_offset = try reserveVecSectionHeader(&binary_bytes); for (wasm.exports.items) |exp| { const name = wasm.string_table.get(exp.name); try leb.writeULEB128(binary_writer, @as(u32, @intCast(name.len))); try binary_writer.writeAll(name); try leb.writeULEB128(binary_writer, @intFromEnum(exp.kind)); try leb.writeULEB128(binary_writer, exp.index); } if (export_memory) { try leb.writeULEB128(binary_writer, @as(u32, @intCast("memory".len))); try binary_writer.writeAll("memory"); try binary_writer.writeByte(std.wasm.externalKind(.memory)); try leb.writeULEB128(binary_writer, @as(u32, 0)); } try writeVecSectionHeader( binary_bytes.items, header_offset, .@"export", @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.exports.items.len + @intFromBool(export_memory)), ); section_count += 1; } if (wasm.entry) |entry_index| { const header_offset = try reserveVecSectionHeader(&binary_bytes); try writeVecSectionHeader( binary_bytes.items, header_offset, .start, @intCast(binary_bytes.items.len - header_offset - header_size), entry_index, ); } // element section (function table) if (wasm.function_table.count() > 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); const table_loc = wasm.findGlobalSymbol("__indirect_function_table").?; const table_sym = table_loc.getSymbol(wasm); const flags: u32 = if (table_sym.index == 0) 0x0 else 0x02; // passive with implicit 0-index table or set table index manually try leb.writeULEB128(binary_writer, flags); if (flags == 0x02) { try leb.writeULEB128(binary_writer, table_sym.index); } try emitInit(binary_writer, .{ .i32_const = 1 }); // We start at index 1, so unresolved function pointers are invalid if (flags == 0x02) { try leb.writeULEB128(binary_writer, @as(u8, 0)); // represents funcref } try leb.writeULEB128(binary_writer, @as(u32, @intCast(wasm.function_table.count()))); var symbol_it = wasm.function_table.keyIterator(); while (symbol_it.next()) |symbol_loc_ptr| { const sym = symbol_loc_ptr.getSymbol(wasm); std.debug.assert(sym.isAlive()); std.debug.assert(sym.index < wasm.functions.count() + wasm.imported_functions_count); try leb.writeULEB128(binary_writer, sym.index); } try writeVecSectionHeader( binary_bytes.items, header_offset, .element, @intCast(binary_bytes.items.len - header_offset - header_size), 1, ); section_count += 1; } // When the shared-memory option is enabled, we *must* emit the 'data count' section. const data_segments_count = wasm.data_segments.count() - @intFromBool(wasm.data_segments.contains(".bss") and !import_memory); if (data_segments_count != 0 and shared_memory) { const header_offset = try reserveVecSectionHeader(&binary_bytes); try writeVecSectionHeader( binary_bytes.items, header_offset, .data_count, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(data_segments_count), ); } // Code section if (wasm.code_section_index != null) { const header_offset = try reserveVecSectionHeader(&binary_bytes); const start_offset = binary_bytes.items.len - 5; // minus 5 so start offset is 5 to include entry count var func_it = wasm.functions.iterator(); while (func_it.next()) |entry| { const sym_loc: SymbolLoc = .{ .index = entry.value_ptr.sym_index, .file = entry.key_ptr.file }; const atom_index = wasm.symbol_atom.get(sym_loc).?; const atom = wasm.getAtomPtr(atom_index); if (!is_obj) { atom.resolveRelocs(wasm); } atom.offset = @intCast(binary_bytes.items.len - start_offset); try leb.writeULEB128(binary_writer, atom.size); try binary_writer.writeAll(atom.code.items); } try writeVecSectionHeader( binary_bytes.items, header_offset, .code, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.functions.count()), ); code_section_index = section_count; section_count += 1; } // Data section if (data_segments_count != 0) { const header_offset = try reserveVecSectionHeader(&binary_bytes); var it = wasm.data_segments.iterator(); var segment_count: u32 = 0; while (it.next()) |entry| { // do not output 'bss' section unless we import memory and therefore // want to guarantee the data is zero initialized if (!import_memory and std.mem.eql(u8, entry.key_ptr.*, ".bss")) continue; const segment_index = entry.value_ptr.*; const segment = wasm.segments.items[segment_index]; if (segment.size == 0) continue; // do not emit empty segments segment_count += 1; var atom_index = wasm.atoms.get(segment_index).?; try leb.writeULEB128(binary_writer, segment.flags); if (segment.flags & @intFromEnum(Wasm.Segment.Flag.WASM_DATA_SEGMENT_HAS_MEMINDEX) != 0) { try leb.writeULEB128(binary_writer, @as(u32, 0)); // memory is always index 0 as we only have 1 memory entry } // when a segment is passive, it's initialized during runtime. if (!segment.isPassive()) { try emitInit(binary_writer, .{ .i32_const = @as(i32, @bitCast(segment.offset)) }); } // offset into data section try leb.writeULEB128(binary_writer, segment.size); // fill in the offset table and the data segments var current_offset: u32 = 0; while (true) { const atom = wasm.getAtomPtr(atom_index); if (!is_obj) { atom.resolveRelocs(wasm); } // Pad with zeroes to ensure all segments are aligned if (current_offset != atom.offset) { const diff = atom.offset - current_offset; try binary_writer.writeByteNTimes(0, diff); current_offset += diff; } assert(current_offset == atom.offset); assert(atom.code.items.len == atom.size); try binary_writer.writeAll(atom.code.items); current_offset += atom.size; if (atom.prev != .null) { atom_index = atom.prev; } else { // also pad with zeroes when last atom to ensure // segments are aligned. if (current_offset != segment.size) { try binary_writer.writeByteNTimes(0, segment.size - current_offset); current_offset += segment.size - current_offset; } break; } } assert(current_offset == segment.size); } try writeVecSectionHeader( binary_bytes.items, header_offset, .data, @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(segment_count), ); data_section_index = section_count; section_count += 1; } if (is_obj) { // relocations need to point to the index of a symbol in the final symbol table. To save memory, // we never store all symbols in a single table, but store a location reference instead. // This means that for a relocatable object file, we need to generate one and provide it to the relocation sections. var symbol_table = std.AutoArrayHashMap(SymbolLoc, u32).init(arena); try wasm.emitLinkSection(&binary_bytes, &symbol_table); if (code_section_index) |code_index| { try wasm.emitCodeRelocations(&binary_bytes, code_index, symbol_table); } if (data_section_index) |data_index| { try wasm.emitDataRelocations(&binary_bytes, data_index, symbol_table); } } else if (comp.config.debug_format != .strip) { try wasm.emitNameSection(&binary_bytes, arena); } if (comp.config.debug_format != .strip) { // The build id must be computed on the main sections only, // so we have to do it now, before the debug sections. switch (wasm.base.build_id) { .none => {}, .fast => { var id: [16]u8 = undefined; std.crypto.hash.sha3.TurboShake128(null).hash(binary_bytes.items, &id, .{}); var uuid: [36]u8 = undefined; _ = try std.fmt.bufPrint(&uuid, "{s}-{s}-{s}-{s}-{s}", .{ std.fmt.fmtSliceHexLower(id[0..4]), std.fmt.fmtSliceHexLower(id[4..6]), std.fmt.fmtSliceHexLower(id[6..8]), std.fmt.fmtSliceHexLower(id[8..10]), std.fmt.fmtSliceHexLower(id[10..]), }); try emitBuildIdSection(&binary_bytes, &uuid); }, .hexstring => |hs| { var buffer: [32 * 2]u8 = undefined; const str = std.fmt.bufPrint(&buffer, "{s}", .{ std.fmt.fmtSliceHexLower(hs.toSlice()), }) catch unreachable; try emitBuildIdSection(&binary_bytes, str); }, else => |mode| try wasm.addErrorWithoutNotes("build-id '{s}' is not supported for WebAssembly", .{@tagName(mode)}), } var debug_bytes = std.ArrayList(u8).init(gpa); defer debug_bytes.deinit(); const DebugSection = struct { name: []const u8, index: ?u32, }; const debug_sections: []const DebugSection = &.{ .{ .name = ".debug_info", .index = wasm.debug_info_index }, .{ .name = ".debug_pubtypes", .index = wasm.debug_pubtypes_index }, .{ .name = ".debug_abbrev", .index = wasm.debug_abbrev_index }, .{ .name = ".debug_line", .index = wasm.debug_line_index }, .{ .name = ".debug_str", .index = wasm.debug_str_index }, .{ .name = ".debug_pubnames", .index = wasm.debug_pubnames_index }, .{ .name = ".debug_loc", .index = wasm.debug_loc_index }, .{ .name = ".debug_ranges", .index = wasm.debug_ranges_index }, }; for (debug_sections) |item| { if (item.index) |index| { var atom = wasm.getAtomPtr(wasm.atoms.get(index).?); while (true) { atom.resolveRelocs(wasm); try debug_bytes.appendSlice(atom.code.items); if (atom.prev == .null) break; atom = wasm.getAtomPtr(atom.prev); } try emitDebugSection(&binary_bytes, debug_bytes.items, item.name); debug_bytes.clearRetainingCapacity(); } } try emitProducerSection(&binary_bytes); if (feature_count > 0) { try emitFeaturesSection(&binary_bytes, &enabled_features, feature_count); } } // Only when writing all sections executed properly we write the magic // bytes. This allows us to easily detect what went wrong while generating // the final binary. { const src = std.wasm.magic ++ std.wasm.version; binary_bytes.items[0..src.len].* = src; } // finally, write the entire binary into the file. var iovec = [_]std.os.iovec_const{.{ .iov_base = binary_bytes.items.ptr, .iov_len = binary_bytes.items.len, }}; try wasm.base.file.?.writevAll(&iovec); } fn emitDebugSection(binary_bytes: *std.ArrayList(u8), data: []const u8, name: []const u8) !void { if (data.len == 0) return; const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); try writer.writeAll(name); const start = binary_bytes.items.len - header_offset; log.debug("Emit debug section: '{s}' start=0x{x:0>8} end=0x{x:0>8}", .{ name, start, start + data.len }); try writer.writeAll(data); try writeCustomSectionHeader( binary_bytes.items, header_offset, @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)), ); } fn emitProducerSection(binary_bytes: *std.ArrayList(u8)) !void { const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); const producers = "producers"; try leb.writeULEB128(writer, @as(u32, @intCast(producers.len))); try writer.writeAll(producers); try leb.writeULEB128(writer, @as(u32, 2)); // 2 fields: Language + processed-by // used for the Zig version var version_buf: [100]u8 = undefined; const version = try std.fmt.bufPrint(&version_buf, "{}", .{build_options.semver}); // language field { const language = "language"; try leb.writeULEB128(writer, @as(u32, @intCast(language.len))); try writer.writeAll(language); // field_value_count (TODO: Parse object files for producer sections to detect their language) try leb.writeULEB128(writer, @as(u32, 1)); // versioned name { try leb.writeULEB128(writer, @as(u32, 3)); // len of "Zig" try writer.writeAll("Zig"); try leb.writeULEB128(writer, @as(u32, @intCast(version.len))); try writer.writeAll(version); } } // processed-by field { const processed_by = "processed-by"; try leb.writeULEB128(writer, @as(u32, @intCast(processed_by.len))); try writer.writeAll(processed_by); // field_value_count (TODO: Parse object files for producer sections to detect other used tools) try leb.writeULEB128(writer, @as(u32, 1)); // versioned name { try leb.writeULEB128(writer, @as(u32, 3)); // len of "Zig" try writer.writeAll("Zig"); try leb.writeULEB128(writer, @as(u32, @intCast(version.len))); try writer.writeAll(version); } } try writeCustomSectionHeader( binary_bytes.items, header_offset, @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)), ); } fn emitBuildIdSection(binary_bytes: *std.ArrayList(u8), build_id: []const u8) !void { const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); const hdr_build_id = "build_id"; try leb.writeULEB128(writer, @as(u32, @intCast(hdr_build_id.len))); try writer.writeAll(hdr_build_id); try leb.writeULEB128(writer, @as(u32, 1)); try leb.writeULEB128(writer, @as(u32, @intCast(build_id.len))); try writer.writeAll(build_id); try writeCustomSectionHeader( binary_bytes.items, header_offset, @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)), ); } fn emitFeaturesSection(binary_bytes: *std.ArrayList(u8), enabled_features: []const bool, features_count: u32) !void { const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); const target_features = "target_features"; try leb.writeULEB128(writer, @as(u32, @intCast(target_features.len))); try writer.writeAll(target_features); try leb.writeULEB128(writer, features_count); for (enabled_features, 0..) |enabled, feature_index| { if (enabled) { const feature: types.Feature = .{ .prefix = .used, .tag = @as(types.Feature.Tag, @enumFromInt(feature_index)) }; try leb.writeULEB128(writer, @intFromEnum(feature.prefix)); var buf: [100]u8 = undefined; const string = try std.fmt.bufPrint(&buf, "{}", .{feature.tag}); try leb.writeULEB128(writer, @as(u32, @intCast(string.len))); try writer.writeAll(string); } } try writeCustomSectionHeader( binary_bytes.items, header_offset, @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)), ); } fn emitNameSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), arena: std.mem.Allocator) !void { const comp = wasm.base.comp; const import_memory = comp.config.import_memory; const Name = struct { index: u32, name: []const u8, fn lessThan(context: void, lhs: @This(), rhs: @This()) bool { _ = context; return lhs.index < rhs.index; } }; // we must de-duplicate symbols that point to the same function var funcs = std.AutoArrayHashMap(u32, Name).init(arena); try funcs.ensureUnusedCapacity(wasm.functions.count() + wasm.imported_functions_count); var globals = try std.ArrayList(Name).initCapacity(arena, wasm.wasm_globals.items.len + wasm.imported_globals_count); var segments = try std.ArrayList(Name).initCapacity(arena, wasm.data_segments.count()); for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm).*; if (symbol.isDead()) { continue; } const name = sym_loc.getName(wasm); switch (symbol.tag) { .function => { const gop = funcs.getOrPutAssumeCapacity(symbol.index); if (!gop.found_existing) { gop.value_ptr.* = .{ .index = symbol.index, .name = name }; } }, .global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = name }), else => {}, } } // data segments are already 'ordered' var data_segment_index: u32 = 0; for (wasm.data_segments.keys()) |key| { // bss section is not emitted when this condition holds true, so we also // do not output a name for it. if (!import_memory and std.mem.eql(u8, key, ".bss")) continue; segments.appendAssumeCapacity(.{ .index = data_segment_index, .name = key }); data_segment_index += 1; } mem.sort(Name, funcs.values(), {}, Name.lessThan); mem.sort(Name, globals.items, {}, Name.lessThan); const header_offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); try leb.writeULEB128(writer, @as(u32, @intCast("name".len))); try writer.writeAll("name"); try wasm.emitNameSubsection(.function, funcs.values(), writer); try wasm.emitNameSubsection(.global, globals.items, writer); try wasm.emitNameSubsection(.data_segment, segments.items, writer); try writeCustomSectionHeader( binary_bytes.items, header_offset, @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)), ); } fn emitNameSubsection(wasm: *Wasm, section_id: std.wasm.NameSubsection, names: anytype, writer: anytype) !void { const gpa = wasm.base.comp.gpa; // We must emit subsection size, so first write to a temporary list var section_list = std.ArrayList(u8).init(gpa); defer section_list.deinit(); const sub_writer = section_list.writer(); try leb.writeULEB128(sub_writer, @as(u32, @intCast(names.len))); for (names) |name| { log.debug("Emit symbol '{s}' type({s})", .{ name.name, @tagName(section_id) }); try leb.writeULEB128(sub_writer, name.index); try leb.writeULEB128(sub_writer, @as(u32, @intCast(name.name.len))); try sub_writer.writeAll(name.name); } // From now, write to the actual writer try leb.writeULEB128(writer, @intFromEnum(section_id)); try leb.writeULEB128(writer, @as(u32, @intCast(section_list.items.len))); try writer.writeAll(section_list.items); } fn emitLimits(writer: anytype, limits: std.wasm.Limits) !void { try writer.writeByte(limits.flags); try leb.writeULEB128(writer, limits.min); if (limits.hasFlag(.WASM_LIMITS_FLAG_HAS_MAX)) { try leb.writeULEB128(writer, limits.max); } } fn emitInit(writer: anytype, init_expr: std.wasm.InitExpression) !void { switch (init_expr) { .i32_const => |val| { try writer.writeByte(std.wasm.opcode(.i32_const)); try leb.writeILEB128(writer, val); }, .i64_const => |val| { try writer.writeByte(std.wasm.opcode(.i64_const)); try leb.writeILEB128(writer, val); }, .f32_const => |val| { try writer.writeByte(std.wasm.opcode(.f32_const)); try writer.writeInt(u32, @bitCast(val), .little); }, .f64_const => |val| { try writer.writeByte(std.wasm.opcode(.f64_const)); try writer.writeInt(u64, @bitCast(val), .little); }, .global_get => |val| { try writer.writeByte(std.wasm.opcode(.global_get)); try leb.writeULEB128(writer, val); }, } try writer.writeByte(std.wasm.opcode(.end)); } fn emitImport(wasm: *Wasm, writer: anytype, import: types.Import) !void { const module_name = wasm.string_table.get(import.module_name); try leb.writeULEB128(writer, @as(u32, @intCast(module_name.len))); try writer.writeAll(module_name); const name = wasm.string_table.get(import.name); try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); try writer.writeAll(name); try writer.writeByte(@intFromEnum(import.kind)); switch (import.kind) { .function => |type_index| try leb.writeULEB128(writer, type_index), .global => |global_type| { try leb.writeULEB128(writer, std.wasm.valtype(global_type.valtype)); try writer.writeByte(@intFromBool(global_type.mutable)); }, .table => |table| { try leb.writeULEB128(writer, std.wasm.reftype(table.reftype)); try emitLimits(writer, table.limits); }, .memory => |limits| { try emitLimits(writer, limits); }, } } fn linkWithLLD(wasm: *Wasm, arena: Allocator, prog_node: *std.Progress.Node) !void { const tracy = trace(@src()); defer tracy.end(); const comp = wasm.base.comp; const shared_memory = comp.config.shared_memory; const export_memory = comp.config.export_memory; const import_memory = comp.config.import_memory; const target = comp.root_mod.resolved_target.result; const gpa = comp.gpa; const directory = wasm.base.emit.directory; // Just an alias to make it shorter to type. const full_out_path = try directory.join(arena, &[_][]const u8{wasm.base.emit.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (comp.module != null) blk: { try wasm.flushModule(arena, prog_node); if (fs.path.dirname(full_out_path)) |dirname| { break :blk try fs.path.join(arena, &.{ dirname, wasm.base.zcu_object_sub_path.? }); } else { break :blk wasm.base.zcu_object_sub_path.?; } } else null; var sub_prog_node = prog_node.start("LLD Link", 0); sub_prog_node.activate(); sub_prog_node.context.refresh(); defer sub_prog_node.end(); const is_obj = comp.config.output_mode == .Obj; const compiler_rt_path: ?[]const u8 = blk: { if (comp.compiler_rt_lib) |lib| break :blk lib.full_object_path; if (comp.compiler_rt_obj) |obj| break :blk obj.full_object_path; break :blk null; }; const id_symlink_basename = "lld.id"; var man: Cache.Manifest = undefined; defer if (!wasm.base.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; if (!wasm.base.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. wasm.base.releaseLock(); comptime assert(Compilation.link_hash_implementation_version == 13); for (comp.objects) |obj| { _ = try man.addFile(obj.path, null); man.hash.add(obj.must_link); } for (comp.c_object_table.keys()) |key| { _ = try man.addFile(key.status.success.object_path, null); } try man.addOptionalFile(module_obj_path); try man.addOptionalFile(compiler_rt_path); man.hash.addOptionalBytes(wasm.entry_name); man.hash.add(wasm.base.stack_size); man.hash.add(wasm.base.build_id); man.hash.add(import_memory); man.hash.add(export_memory); man.hash.add(wasm.import_table); man.hash.add(wasm.export_table); man.hash.addOptional(wasm.initial_memory); man.hash.addOptional(wasm.max_memory); man.hash.add(shared_memory); man.hash.addOptional(wasm.global_base); man.hash.addListOfBytes(wasm.export_symbol_names); // strip does not need to go into the linker hash because it is part of the hash namespace // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. _ = try man.hit(); digest = man.final(); var prev_digest_buf: [digest.len]u8 = undefined; const prev_digest: []u8 = Cache.readSmallFile( directory.handle, id_symlink_basename, &prev_digest_buf, ) catch |err| blk: { log.debug("WASM LLD new_digest={s} error: {s}", .{ std.fmt.fmtSliceHexLower(&digest), @errorName(err) }); // Handle this as a cache miss. break :blk prev_digest_buf[0..0]; }; if (mem.eql(u8, prev_digest, &digest)) { log.debug("WASM LLD digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); // Hot diggity dog! The output binary is already there. wasm.base.lock = man.toOwnedLock(); return; } log.debug("WASM LLD prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), std.fmt.fmtSliceHexLower(&digest) }); // We are about to change the output file to be different, so we invalidate the build hash now. directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { error.FileNotFound => {}, else => |e| return e, }; } if (is_obj) { // LLD's WASM driver does not support the equivalent of `-r` so we do a simple file copy // here. TODO: think carefully about how we can avoid this redundant operation when doing // build-obj. See also the corresponding TODO in linkAsArchive. const the_object_path = blk: { if (comp.objects.len != 0) break :blk comp.objects[0].path; if (comp.c_object_table.count() != 0) break :blk comp.c_object_table.keys()[0].status.success.object_path; if (module_obj_path) |p| break :blk p; // TODO I think this is unreachable. Audit this situation when solving the above TODO // regarding eliding redundant object -> object transformations. return error.NoObjectsToLink; }; // This can happen when using --enable-cache and using the stage1 backend. In this case // we can skip the file copy. if (!mem.eql(u8, the_object_path, full_out_path)) { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { // Create an LLD command line and invoke it. var argv = std.ArrayList([]const u8).init(gpa); defer argv.deinit(); // We will invoke ourselves as a child process to gain access to LLD. // This is necessary because LLD does not behave properly as a library - // it calls exit() and does not reset all global data between invocations. const linker_command = "wasm-ld"; try argv.appendSlice(&[_][]const u8{ comp.self_exe_path.?, linker_command }); try argv.append("--error-limit=0"); if (comp.config.lto) { switch (comp.root_mod.optimize_mode) { .Debug => {}, .ReleaseSmall => try argv.append("-O2"), .ReleaseFast, .ReleaseSafe => try argv.append("-O3"), } } if (import_memory) { try argv.append("--import-memory"); } if (export_memory) { try argv.append("--export-memory"); } if (wasm.import_table) { assert(!wasm.export_table); try argv.append("--import-table"); } if (wasm.export_table) { assert(!wasm.import_table); try argv.append("--export-table"); } // For wasm-ld we only need to specify '--no-gc-sections' when the user explicitly // specified it as garbage collection is enabled by default. if (!wasm.base.gc_sections) { try argv.append("--no-gc-sections"); } if (comp.config.debug_format == .strip) { try argv.append("-s"); } if (wasm.initial_memory) |initial_memory| { const arg = try std.fmt.allocPrint(arena, "--initial-memory={d}", .{initial_memory}); try argv.append(arg); } if (wasm.max_memory) |max_memory| { const arg = try std.fmt.allocPrint(arena, "--max-memory={d}", .{max_memory}); try argv.append(arg); } if (shared_memory) { try argv.append("--shared-memory"); } if (wasm.global_base) |global_base| { const arg = try std.fmt.allocPrint(arena, "--global-base={d}", .{global_base}); try argv.append(arg); } else { // We prepend it by default, so when a stack overflow happens the runtime will trap correctly, // rather than silently overwrite all global declarations. See https://github.com/ziglang/zig/issues/4496 // // The user can overwrite this behavior by setting the global-base try argv.append("--stack-first"); } // Users are allowed to specify which symbols they want to export to the wasm host. for (wasm.export_symbol_names) |symbol_name| { const arg = try std.fmt.allocPrint(arena, "--export={s}", .{symbol_name}); try argv.append(arg); } if (comp.config.rdynamic) { try argv.append("--export-dynamic"); } if (wasm.entry_name) |entry_name| { try argv.appendSlice(&.{ "--entry", entry_name }); } else { try argv.append("--no-entry"); } try argv.appendSlice(&.{ "-z", try std.fmt.allocPrint(arena, "stack-size={d}", .{wasm.base.stack_size}), }); if (wasm.import_symbols) { try argv.append("--allow-undefined"); } if (comp.config.output_mode == .Lib and comp.config.link_mode == .dynamic) { try argv.append("--shared"); } if (comp.config.pie) { try argv.append("--pie"); } // XXX - TODO: add when wasm-ld supports --build-id. // if (wasm.base.build_id) { // try argv.append("--build-id=tree"); // } try argv.appendSlice(&.{ "-o", full_out_path }); if (target.cpu.arch == .wasm64) { try argv.append("-mwasm64"); } if (target.os.tag == .wasi) { const is_exe_or_dyn_lib = comp.config.output_mode == .Exe or (comp.config.output_mode == .Lib and comp.config.link_mode == .dynamic); if (is_exe_or_dyn_lib) { for (comp.wasi_emulated_libs) |crt_file| { try argv.append(try comp.get_libc_crt_file( arena, wasi_libc.emulatedLibCRFileLibName(crt_file), )); } if (comp.config.link_libc) { try argv.append(try comp.get_libc_crt_file( arena, wasi_libc.execModelCrtFileFullName(comp.config.wasi_exec_model), )); try argv.append(try comp.get_libc_crt_file(arena, "libc.a")); } if (comp.config.link_libcpp) { try argv.append(comp.libcxx_static_lib.?.full_object_path); try argv.append(comp.libcxxabi_static_lib.?.full_object_path); } } } // Positional arguments to the linker such as object files. var whole_archive = false; for (comp.objects) |obj| { if (obj.must_link and !whole_archive) { try argv.append("-whole-archive"); whole_archive = true; } else if (!obj.must_link and whole_archive) { try argv.append("-no-whole-archive"); whole_archive = false; } try argv.append(obj.path); } if (whole_archive) { try argv.append("-no-whole-archive"); whole_archive = false; } for (comp.c_object_table.keys()) |key| { try argv.append(key.status.success.object_path); } if (module_obj_path) |p| { try argv.append(p); } if (comp.config.output_mode != .Obj and !comp.skip_linker_dependencies and !comp.config.link_libc) { try argv.append(comp.libc_static_lib.?.full_object_path); } if (compiler_rt_path) |p| { try argv.append(p); } if (comp.verbose_link) { // Skip over our own name so that the LLD linker name is the first argv item. Compilation.dump_argv(argv.items[1..]); } if (std.process.can_spawn) { // If possible, we run LLD as a child process because it does not always // behave properly as a library, unfortunately. // https://github.com/ziglang/zig/issues/3825 var child = std.ChildProcess.init(argv.items, arena); if (comp.clang_passthrough_mode) { child.stdin_behavior = .Inherit; child.stdout_behavior = .Inherit; child.stderr_behavior = .Inherit; const term = child.spawnAndWait() catch |err| { log.err("unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) }); return error.UnableToSpawnWasm; }; switch (term) { .Exited => |code| { if (code != 0) { std.process.exit(code); } }, else => std.process.abort(), } } else { child.stdin_behavior = .Ignore; child.stdout_behavior = .Ignore; child.stderr_behavior = .Pipe; try child.spawn(); const stderr = try child.stderr.?.reader().readAllAlloc(arena, std.math.maxInt(usize)); const term = child.wait() catch |err| { log.err("unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) }); return error.UnableToSpawnWasm; }; switch (term) { .Exited => |code| { if (code != 0) { comp.lockAndParseLldStderr(linker_command, stderr); return error.LLDReportedFailure; } }, else => { log.err("{s} terminated with stderr:\n{s}", .{ argv.items[0], stderr }); return error.LLDCrashed; }, } if (stderr.len != 0) { log.warn("unexpected LLD stderr:\n{s}", .{stderr}); } } } else { const exit_code = try lldMain(arena, argv.items, false); if (exit_code != 0) { if (comp.clang_passthrough_mode) { std.process.exit(exit_code); } else { return error.LLDReportedFailure; } } } // Give +x to the .wasm file if it is an executable and the OS is WASI. // Some systems may be configured to execute such binaries directly. Even if that // is not the case, it means we will get "exec format error" when trying to run // it, and then can react to that in the same way as trying to run an ELF file // from a foreign CPU architecture. if (fs.has_executable_bit and target.os.tag == .wasi and comp.config.output_mode == .Exe) { // TODO: what's our strategy for reporting linker errors from this function? // report a nice error here with the file path if it fails instead of // just returning the error code. // chmod does not interact with umask, so we use a conservative -rwxr--r-- here. std.os.fchmodat(fs.cwd().fd, full_out_path, 0o744, 0) catch |err| switch (err) { error.OperationNotSupported => unreachable, // Not a symlink. else => |e| return e, }; } } if (!wasm.base.disable_lld_caching) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { log.warn("failed to save linking hash digest symlink: {s}", .{@errorName(err)}); }; // Again failure here only means an unnecessary cache miss. man.writeManifest() catch |err| { log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); }; // We hang on to this lock so that the output file path can be used without // other processes clobbering it. wasm.base.lock = man.toOwnedLock(); } } fn reserveVecSectionHeader(bytes: *std.ArrayList(u8)) !u32 { // section id + fixed leb contents size + fixed leb vector length const header_size = 1 + 5 + 5; const offset = @as(u32, @intCast(bytes.items.len)); try bytes.appendSlice(&[_]u8{0} ** header_size); return offset; } fn reserveCustomSectionHeader(bytes: *std.ArrayList(u8)) !u32 { // unlike regular section, we don't emit the count const header_size = 1 + 5; const offset = @as(u32, @intCast(bytes.items.len)); try bytes.appendSlice(&[_]u8{0} ** header_size); return offset; } fn writeVecSectionHeader(buffer: []u8, offset: u32, section: std.wasm.Section, size: u32, items: u32) !void { var buf: [1 + 5 + 5]u8 = undefined; buf[0] = @intFromEnum(section); leb.writeUnsignedFixed(5, buf[1..6], size); leb.writeUnsignedFixed(5, buf[6..], items); buffer[offset..][0..buf.len].* = buf; } fn writeCustomSectionHeader(buffer: []u8, offset: u32, size: u32) !void { var buf: [1 + 5]u8 = undefined; buf[0] = 0; // 0 = 'custom' section leb.writeUnsignedFixed(5, buf[1..6], size); buffer[offset..][0..buf.len].* = buf; } fn emitLinkSection(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: *std.AutoArrayHashMap(SymbolLoc, u32)) !void { const offset = try reserveCustomSectionHeader(binary_bytes); const writer = binary_bytes.writer(); // emit "linking" custom section name const section_name = "linking"; try leb.writeULEB128(writer, section_name.len); try writer.writeAll(section_name); // meta data version, which is currently '2' try leb.writeULEB128(writer, @as(u32, 2)); // For each subsection type (found in types.Subsection) we can emit a section. // Currently, we only support emitting segment info and the symbol table. try wasm.emitSymbolTable(binary_bytes, symbol_table); try wasm.emitSegmentInfo(binary_bytes); const size: u32 = @intCast(binary_bytes.items.len - offset - 6); try writeCustomSectionHeader(binary_bytes.items, offset, size); } fn emitSymbolTable(wasm: *Wasm, binary_bytes: *std.ArrayList(u8), symbol_table: *std.AutoArrayHashMap(SymbolLoc, u32)) !void { const writer = binary_bytes.writer(); try leb.writeULEB128(writer, @intFromEnum(types.SubsectionType.WASM_SYMBOL_TABLE)); const table_offset = binary_bytes.items.len; var symbol_count: u32 = 0; for (wasm.resolved_symbols.keys()) |sym_loc| { const symbol = sym_loc.getSymbol(wasm).*; if (symbol.tag == .dead) continue; // Do not emit dead symbols try symbol_table.putNoClobber(sym_loc, symbol_count); symbol_count += 1; log.debug("Emit symbol: {}", .{symbol}); try leb.writeULEB128(writer, @intFromEnum(symbol.tag)); try leb.writeULEB128(writer, symbol.flags); const sym_name = sym_loc.getName(wasm); switch (symbol.tag) { .data => { try leb.writeULEB128(writer, @as(u32, @intCast(sym_name.len))); try writer.writeAll(sym_name); if (symbol.isDefined()) { try leb.writeULEB128(writer, symbol.index); const atom_index = wasm.symbol_atom.get(sym_loc).?; const atom = wasm.getAtom(atom_index); try leb.writeULEB128(writer, @as(u32, atom.offset)); try leb.writeULEB128(writer, @as(u32, atom.size)); } }, .section => { try leb.writeULEB128(writer, symbol.index); }, else => { try leb.writeULEB128(writer, symbol.index); if (symbol.isDefined()) { try leb.writeULEB128(writer, @as(u32, @intCast(sym_name.len))); try writer.writeAll(sym_name); } }, } } var buf: [10]u8 = undefined; leb.writeUnsignedFixed(5, buf[0..5], @intCast(binary_bytes.items.len - table_offset + 5)); leb.writeUnsignedFixed(5, buf[5..], symbol_count); try binary_bytes.insertSlice(table_offset, &buf); } fn emitSegmentInfo(wasm: *Wasm, binary_bytes: *std.ArrayList(u8)) !void { const writer = binary_bytes.writer(); try leb.writeULEB128(writer, @intFromEnum(types.SubsectionType.WASM_SEGMENT_INFO)); const segment_offset = binary_bytes.items.len; try leb.writeULEB128(writer, @as(u32, @intCast(wasm.segment_info.count()))); for (wasm.segment_info.values()) |segment_info| { log.debug("Emit segment: {s} align({d}) flags({b})", .{ segment_info.name, segment_info.alignment, segment_info.flags, }); try leb.writeULEB128(writer, @as(u32, @intCast(segment_info.name.len))); try writer.writeAll(segment_info.name); try leb.writeULEB128(writer, segment_info.alignment.toLog2Units()); try leb.writeULEB128(writer, segment_info.flags); } var buf: [5]u8 = undefined; leb.writeUnsignedFixed(5, &buf, @as(u32, @intCast(binary_bytes.items.len - segment_offset))); try binary_bytes.insertSlice(segment_offset, &buf); } pub fn getULEB128Size(uint_value: anytype) u32 { const T = @TypeOf(uint_value); const U = if (@typeInfo(T).Int.bits < 8) u8 else T; var value = @as(U, @intCast(uint_value)); var size: u32 = 0; while (value != 0) : (size += 1) { value >>= 7; } return size; } /// For each relocatable section, emits a custom "relocation." section fn emitCodeRelocations( wasm: *Wasm, binary_bytes: *std.ArrayList(u8), section_index: u32, symbol_table: std.AutoArrayHashMap(SymbolLoc, u32), ) !void { const code_index = wasm.code_section_index orelse return; const writer = binary_bytes.writer(); const header_offset = try reserveCustomSectionHeader(binary_bytes); // write custom section information const name = "reloc.CODE"; try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); try writer.writeAll(name); try leb.writeULEB128(writer, section_index); const reloc_start = binary_bytes.items.len; var count: u32 = 0; var atom: *Atom = wasm.getAtomPtr(wasm.atoms.get(code_index).?); // for each atom, we calculate the uleb size and append that var size_offset: u32 = 5; // account for code section size leb128 while (true) { size_offset += getULEB128Size(atom.size); for (atom.relocs.items) |relocation| { count += 1; const sym_loc: SymbolLoc = .{ .file = atom.file, .index = @enumFromInt(relocation.index) }; const symbol_index = symbol_table.get(sym_loc).?; try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type)); const offset = atom.offset + relocation.offset + size_offset; try leb.writeULEB128(writer, offset); try leb.writeULEB128(writer, symbol_index); if (relocation.relocation_type.addendIsPresent()) { try leb.writeILEB128(writer, relocation.addend); } log.debug("Emit relocation: {}", .{relocation}); } if (atom.prev == .null) break; atom = wasm.getAtomPtr(atom.prev); } if (count == 0) return; var buf: [5]u8 = undefined; leb.writeUnsignedFixed(5, &buf, count); try binary_bytes.insertSlice(reloc_start, &buf); const size: u32 = @intCast(binary_bytes.items.len - header_offset - 6); try writeCustomSectionHeader(binary_bytes.items, header_offset, size); } fn emitDataRelocations( wasm: *Wasm, binary_bytes: *std.ArrayList(u8), section_index: u32, symbol_table: std.AutoArrayHashMap(SymbolLoc, u32), ) !void { if (wasm.data_segments.count() == 0) return; const writer = binary_bytes.writer(); const header_offset = try reserveCustomSectionHeader(binary_bytes); // write custom section information const name = "reloc.DATA"; try leb.writeULEB128(writer, @as(u32, @intCast(name.len))); try writer.writeAll(name); try leb.writeULEB128(writer, section_index); const reloc_start = binary_bytes.items.len; var count: u32 = 0; // for each atom, we calculate the uleb size and append that var size_offset: u32 = 5; // account for code section size leb128 for (wasm.data_segments.values()) |segment_index| { var atom: *Atom = wasm.getAtomPtr(wasm.atoms.get(segment_index).?); while (true) { size_offset += getULEB128Size(atom.size); for (atom.relocs.items) |relocation| { count += 1; const sym_loc: SymbolLoc = .{ .file = atom.file, .index = @enumFromInt(relocation.index) }; const symbol_index = symbol_table.get(sym_loc).?; try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type)); const offset = atom.offset + relocation.offset + size_offset; try leb.writeULEB128(writer, offset); try leb.writeULEB128(writer, symbol_index); if (relocation.relocation_type.addendIsPresent()) { try leb.writeILEB128(writer, relocation.addend); } log.debug("Emit relocation: {}", .{relocation}); } if (atom.prev == .null) break; atom = wasm.getAtomPtr(atom.prev); } } if (count == 0) return; var buf: [5]u8 = undefined; leb.writeUnsignedFixed(5, &buf, count); try binary_bytes.insertSlice(reloc_start, &buf); const size = @as(u32, @intCast(binary_bytes.items.len - header_offset - 6)); try writeCustomSectionHeader(binary_bytes.items, header_offset, size); } fn hasPassiveInitializationSegments(wasm: *const Wasm) bool { const comp = wasm.base.comp; const import_memory = comp.config.import_memory; var it = wasm.data_segments.iterator(); while (it.next()) |entry| { const segment: Segment = wasm.segments.items[entry.value_ptr.*]; if (segment.needsPassiveInitialization(import_memory, entry.key_ptr.*)) { return true; } } return false; } /// Searches for a matching function signature. When no matching signature is found, /// a new entry will be made. The value returned is the index of the type within `wasm.func_types`. pub fn putOrGetFuncType(wasm: *Wasm, func_type: std.wasm.Type) !u32 { if (wasm.getTypeIndex(func_type)) |index| { return index; } // functype does not exist. const gpa = wasm.base.comp.gpa; const index: u32 = @intCast(wasm.func_types.items.len); const params = try gpa.dupe(std.wasm.Valtype, func_type.params); errdefer gpa.free(params); const returns = try gpa.dupe(std.wasm.Valtype, func_type.returns); errdefer gpa.free(returns); try wasm.func_types.append(gpa, .{ .params = params, .returns = returns, }); return index; } /// For the given `decl_index`, stores the corresponding type representing the function signature. /// Asserts declaration has an associated `Atom`. /// Returns the index into the list of types. pub fn storeDeclType(wasm: *Wasm, decl_index: InternPool.DeclIndex, func_type: std.wasm.Type) !u32 { return wasm.zigObjectPtr().?.storeDeclType(wasm.base.comp.gpa, decl_index, func_type); } /// Returns the symbol index of the error name table. /// /// When the symbol does not yet exist, it will create a new one instead. pub fn getErrorTableSymbol(wasm_file: *Wasm) !u32 { const sym_index = try wasm_file.zigObjectPtr().?.getErrorTableSymbol(wasm_file); return @intFromEnum(sym_index); } /// For a given `InternPool.DeclIndex` returns its corresponding `Atom.Index`. /// When the index was not found, a new `Atom` will be created, and its index will be returned. /// The newly created Atom is empty with default fields as specified by `Atom.empty`. pub fn getOrCreateAtomForDecl(wasm_file: *Wasm, decl_index: InternPool.DeclIndex) !Atom.Index { return wasm_file.zigObjectPtr().?.getOrCreateAtomForDecl(wasm_file, decl_index); } /// Verifies all resolved symbols and checks whether itself needs to be marked alive, /// as well as any of its references. fn markReferences(wasm: *Wasm) !void { const tracy = trace(@src()); defer tracy.end(); const do_garbage_collect = wasm.base.gc_sections; const comp = wasm.base.comp; for (wasm.resolved_symbols.keys()) |sym_loc| { const sym = sym_loc.getSymbol(wasm); if (sym.isExported(comp.config.rdynamic) or sym.isNoStrip() or !do_garbage_collect) { try wasm.mark(sym_loc); continue; } // Debug sections may require to be parsed and marked when it contains // relocations to alive symbols. if (sym.tag == .section and comp.config.debug_format != .strip) { const obj_file = wasm.file(sym_loc.file) orelse continue; // Incremental debug info is done independently _ = try obj_file.parseSymbolIntoAtom(wasm, sym_loc.index); sym.mark(); } } } /// Marks a symbol as 'alive' recursively so itself and any references it contains to /// other symbols will not be omit from the binary. fn mark(wasm: *Wasm, loc: SymbolLoc) !void { const symbol = loc.getSymbol(wasm); if (symbol.isAlive()) { // Symbol is already marked alive, including its references. // This means we can skip it so we don't end up marking the same symbols // multiple times. return; } symbol.mark(); gc_log.debug("Marked symbol '{s}'", .{loc.getName(wasm)}); if (symbol.isUndefined()) { // undefined symbols do not have an associated `Atom` and therefore also // do not contain relocations. return; } const atom_index = if (wasm.file(loc.file)) |obj_file| try obj_file.parseSymbolIntoAtom(wasm, loc.index) else wasm.symbol_atom.get(loc) orelse return; const atom = wasm.getAtom(atom_index); for (atom.relocs.items) |reloc| { const target_loc: SymbolLoc = .{ .index = @enumFromInt(reloc.index), .file = loc.file }; try wasm.mark(target_loc.finalLoc(wasm)); } } fn defaultEntrySymbolName(wasi_exec_model: std.builtin.WasiExecModel) []const u8 { return switch (wasi_exec_model) { .reactor => "_initialize", .command => "_start", }; } const ErrorWithNotes = struct { /// Allocated index in comp.link_errors array. index: usize, /// Next available note slot. note_slot: usize = 0, pub fn addMsg( err: ErrorWithNotes, wasm_file: *const Wasm, comptime format: []const u8, args: anytype, ) error{OutOfMemory}!void { const comp = wasm_file.base.comp; const gpa = comp.gpa; const err_msg = &comp.link_errors.items[err.index]; err_msg.msg = try std.fmt.allocPrint(gpa, format, args); } pub fn addNote( err: *ErrorWithNotes, wasm_file: *const Wasm, comptime format: []const u8, args: anytype, ) error{OutOfMemory}!void { const comp = wasm_file.base.comp; const gpa = comp.gpa; const err_msg = &comp.link_errors.items[err.index]; err_msg.notes[err.note_slot] = .{ .msg = try std.fmt.allocPrint(gpa, format, args) }; err.note_slot += 1; } }; pub fn addErrorWithNotes(wasm: *const Wasm, note_count: usize) error{OutOfMemory}!ErrorWithNotes { const comp = wasm.base.comp; const gpa = comp.gpa; try comp.link_errors.ensureUnusedCapacity(gpa, 1); return wasm.addErrorWithNotesAssumeCapacity(note_count); } pub fn addErrorWithoutNotes(wasm: *const Wasm, comptime fmt: []const u8, args: anytype) !void { const err = try wasm.addErrorWithNotes(0); try err.addMsg(wasm, fmt, args); } fn addErrorWithNotesAssumeCapacity(wasm: *const Wasm, note_count: usize) error{OutOfMemory}!ErrorWithNotes { const comp = wasm.base.comp; const gpa = comp.gpa; const index = comp.link_errors.items.len; const err = comp.link_errors.addOneAssumeCapacity(); err.* = .{ .msg = undefined, .notes = try gpa.alloc(link.File.ErrorMsg, note_count) }; return .{ .index = index }; }