diff options
| author | Luuk de Gram <luuk@degram.dev> | 2022-12-13 19:19:05 +0100 |
|---|---|---|
| committer | Luuk de Gram <luuk@degram.dev> | 2022-12-13 19:19:05 +0100 |
| commit | 41199bba4bae4783634ee52ecd8d111f8f82fd4c (patch) | |
| tree | bf9230dc76c78a07a0629113a91a02e9d4571bde /src | |
| parent | 4832677c3bce61725c67306c4683921296abdff9 (diff) | |
| download | zig-41199bba4bae4783634ee52ecd8d111f8f82fd4c.tar.gz zig-41199bba4bae4783634ee52ecd8d111f8f82fd4c.zip | |
wasm-linker: Create separate path for one-shot
When invoking the self-hosted linker using `-fno-LLD` while using the
LLVM backend or invoking it as a linker, we create a seperate path.
This path will link the object file generated by LLVM and the
supplied object files just once, allowing to simplify the
implementation between incremental and regular linking.
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/Wasm.zig | 537 |
1 files changed, 533 insertions, 4 deletions
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 74a303b8c7..e5bce8bda6 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -311,13 +311,23 @@ pub const StringTable = struct { pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*Wasm { assert(options.target.ofmt == .wasm); - if (build_options.have_llvm and options.use_llvm) { + if (build_options.have_llvm and options.use_llvm and options.use_lld) { return createEmpty(allocator, options); } const wasm_bin = try createEmpty(allocator, options); errdefer wasm_bin.base.destroy(); + // We are not using LLD at this point, so ensure we set the intermediary basename + if (build_options.have_llvm and options.use_llvm and options.module != null) { + // TODO this intermediary_basename isn't enough; in the case of `zig build-exe`, + // we also want to put the intermediary object file in the cache while the + // main emit directory is the cwd. + wasm_bin.base.intermediary_basename = try std.fmt.allocPrint(allocator, "{s}{s}", .{ + options.emit.?.sub_path, options.target.ofmt.fileExt(options.target.cpu.arch), + }); + } + // TODO: read the file and keep valid parts instead of truncating const file = try options.emit.?.directory.handle.createFile(sub_path, .{ .truncate = true, .read = true }); wasm_bin.base.file = file; @@ -2134,7 +2144,6 @@ pub fn createDebugSectionForIndex(wasm: *Wasm, index: *?u32, name: []const u8) ! const new_index = @intCast(u32, wasm.segments.items.len); index.* = new_index; try wasm.appendDummySegment(); - // _ = index; const sym_index = wasm.symbols_free_list.popOrNull() orelse idx: { const tmp_index = @intCast(u32, wasm.symbols.items.len); @@ -2202,14 +2211,17 @@ pub fn flush(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) lin } return; } + if (build_options.have_llvm and wasm.base.options.use_lld) { return wasm.linkWithLLD(comp, prog_node); + } else if (build_options.have_llvm and wasm.base.options.use_llvm and !wasm.base.options.use_lld) { + return wasm.linkWithZld(comp, prog_node); } else { return wasm.flushModule(comp, prog_node); } } -pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { +fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { const tracy = trace(@src()); defer tracy.end(); @@ -2219,7 +2231,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod } } - var sub_prog_node = prog_node.start("WASM Flush", 0); + var sub_prog_node = prog_node.start("Wasm Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); @@ -2726,6 +2738,523 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod try wasm.base.file.?.writevAll(&iovec); } +pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void { + const tracy = trace(@src()); + defer tracy.end(); + + if (build_options.have_llvm) { + if (wasm.llvm_object) |llvm_object| { + return try llvm_object.flushModule(comp, prog_node); + } + } + + var sub_prog_node = prog_node.start("Wasm Flush", 0); + sub_prog_node.activate(); + defer sub_prog_node.end(); + + // ensure the error names table is populated when an error name is referenced + try wasm.populateErrorNameTable(); + + // The amount of sections that will be written + var section_count: u32 = 0; + // Index of the code section. Used to tell relocation table where the section lives. + var code_section_index: ?u32 = null; + // Index of the data section. Used to tell relocation table where the section lives. + var data_section_index: ?u32 = null; + + // Used for all temporary memory allocated during flushin + var arena_instance = std.heap.ArenaAllocator.init(wasm.base.allocator); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(wasm.base.options.objects.len); + + for (wasm.base.options.objects) |object| { + positionals.appendAssumeCapacity(object.path); + } + + for (comp.c_object_table.keys()) |c_object| { + try positionals.append(c_object.status.success.object_path); + } + + if (comp.compiler_rt_lib) |lib| { + try positionals.append(lib.full_object_path); + } + + try wasm.parseInputFiles(positionals.items); + + for (wasm.objects.items) |_, object_index| { + try wasm.resolveSymbolsInObject(@intCast(u16, object_index)); + } + + var emit_features_count: u32 = 0; + var enabled_features: [@typeInfo(types.Feature.Tag).Enum.fields.len]bool = undefined; + try wasm.validateFeatures(&enabled_features, &emit_features_count); + try wasm.resolveSymbolsInArchives(); + try wasm.checkUndefinedSymbols(); + + // When we finish/error we reset the state of the linker + // So we can rebuild the binary file on each incremental update + defer wasm.resetState(); + try wasm.setupStart(); + try wasm.setupImports(); + if (wasm.base.options.module) |mod| { + var decl_it = wasm.decls.keyIterator(); + while (decl_it.next()) |decl_index_ptr| { + const decl = mod.declPtr(decl_index_ptr.*); + if (decl.isExtern()) continue; + const atom = &decl.*.link.wasm; + if (decl.ty.zigTypeTag() == .Fn) { + try wasm.parseAtom(atom, .{ .function = decl.fn_link.wasm }); + } else if (decl.getVariable()) |variable| { + if (!variable.is_mutable) { + try wasm.parseAtom(atom, .{ .data = .read_only }); + } else if (variable.init.isUndefDeep()) { + try wasm.parseAtom(atom, .{ .data = .uninitialized }); + } else { + try wasm.parseAtom(atom, .{ .data = .initialized }); + } + } else { + try wasm.parseAtom(atom, .{ .data = .read_only }); + } + + // also parse atoms for a decl's locals + for (atom.locals.items) |*local_atom| { + try wasm.parseAtom(local_atom, .{ .data = .read_only }); + } + } + + if (wasm.dwarf) |*dwarf| { + try dwarf.flushModule(wasm.base.options.module.?); + } + } + + for (wasm.objects.items) |*object, object_index| { + try object.parseIntoAtoms(wasm.base.allocator, @intCast(u16, object_index), wasm); + } + + try wasm.allocateAtoms(); + try wasm.setupMemory(); + wasm.mapFunctionTable(); + try wasm.mergeSections(); + try wasm.mergeTypes(); + try wasm.setupExports(); + + const header_size = 5 + 1; + const is_obj = wasm.base.options.output_mode == .Obj or (!wasm.base.options.use_llvm and wasm.base.options.use_lld); + + var binary_bytes = std.ArrayList(u8).init(wasm.base.allocator); + defer binary_bytes.deinit(); + const binary_writer = binary_bytes.writer(); + + // We write the magic bytes at the end so they will only be written + // if everything succeeded as expected. So populate with 0's for now. + try binary_writer.writeAll(&[_]u8{0} ** 8); + // (Re)set file pointer to 0 + try wasm.base.file.?.setEndPos(0); + try wasm.base.file.?.seekTo(0); + + // Type section + if (wasm.func_types.items.len != 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + log.debug("Writing type section. Count: ({d})", .{wasm.func_types.items.len}); + for (wasm.func_types.items) |func_type| { + try leb.writeULEB128(binary_writer, std.wasm.function_type); + try leb.writeULEB128(binary_writer, @intCast(u32, func_type.params.len)); + for (func_type.params) |param_ty| { + try leb.writeULEB128(binary_writer, std.wasm.valtype(param_ty)); + } + try leb.writeULEB128(binary_writer, @intCast(u32, func_type.returns.len)); + for (func_type.returns) |ret_ty| { + try leb.writeULEB128(binary_writer, std.wasm.valtype(ret_ty)); + } + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .type, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, wasm.func_types.items.len), + ); + section_count += 1; + } + + // Import section + const import_memory = wasm.base.options.import_memory or is_obj; + const import_table = wasm.base.options.import_table or is_obj; + if (wasm.imports.count() != 0 or import_memory or import_table) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + // import table is always first table so emit that first + if (import_table) { + const table_imp: types.Import = .{ + .module_name = try wasm.string_table.put(wasm.base.allocator, wasm.host_name), + .name = try wasm.string_table.put(wasm.base.allocator, "__indirect_function_table"), + .kind = .{ + .table = .{ + .limits = .{ + .min = @intCast(u32, wasm.function_table.count()), + .max = null, + }, + .reftype = .funcref, + }, + }, + }; + try wasm.emitImport(binary_writer, table_imp); + } + + var it = wasm.imports.iterator(); + while (it.next()) |entry| { + assert(entry.key_ptr.*.getSymbol(wasm).isUndefined()); + const import = entry.value_ptr.*; + try wasm.emitImport(binary_writer, import); + } + + if (import_memory) { + const mem_name = if (is_obj) "__linear_memory" else "memory"; + const mem_imp: types.Import = .{ + .module_name = try wasm.string_table.put(wasm.base.allocator, wasm.host_name), + .name = try wasm.string_table.put(wasm.base.allocator, mem_name), + .kind = .{ .memory = wasm.memories.limits }, + }; + try wasm.emitImport(binary_writer, mem_imp); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .import, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, wasm.imports.count() + @boolToInt(import_memory) + @boolToInt(import_table)), + ); + section_count += 1; + } + + // Function section + if (wasm.functions.count() != 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + for (wasm.functions.values()) |function| { + try leb.writeULEB128(binary_writer, function.type_index); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .function, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, wasm.functions.count()), + ); + section_count += 1; + } + + // Table section + const export_table = wasm.base.options.export_table; + if (!import_table and wasm.function_table.count() != 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + try leb.writeULEB128(binary_writer, std.wasm.reftype(.funcref)); + try emitLimits(binary_writer, .{ + .min = @intCast(u32, wasm.function_table.count()) + 1, + .max = null, + }); + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .table, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @as(u32, 1), + ); + section_count += 1; + } + + // Memory section + if (!import_memory) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + try emitLimits(binary_writer, wasm.memories.limits); + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .memory, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @as(u32, 1), // wasm currently only supports 1 linear memory segment + ); + section_count += 1; + } + + // Global section (used to emit stack pointer) + if (wasm.wasm_globals.items.len > 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + for (wasm.wasm_globals.items) |global| { + try binary_writer.writeByte(std.wasm.valtype(global.global_type.valtype)); + try binary_writer.writeByte(@boolToInt(global.global_type.mutable)); + try emitInit(binary_writer, global.init); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .global, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, wasm.wasm_globals.items.len), + ); + section_count += 1; + } + + // Export section + if (wasm.exports.items.len != 0 or export_table or !import_memory) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + for (wasm.exports.items) |exp| { + const name = wasm.string_table.get(exp.name); + try leb.writeULEB128(binary_writer, @intCast(u32, name.len)); + try binary_writer.writeAll(name); + try leb.writeULEB128(binary_writer, @enumToInt(exp.kind)); + try leb.writeULEB128(binary_writer, exp.index); + } + + if (export_table) { + try leb.writeULEB128(binary_writer, @intCast(u32, "__indirect_function_table".len)); + try binary_writer.writeAll("__indirect_function_table"); + try binary_writer.writeByte(std.wasm.externalKind(.table)); + try leb.writeULEB128(binary_writer, @as(u32, 0)); // function table is always the first table + } + + if (!import_memory) { + try leb.writeULEB128(binary_writer, @intCast(u32, "memory".len)); + try binary_writer.writeAll("memory"); + try binary_writer.writeByte(std.wasm.externalKind(.memory)); + try leb.writeULEB128(binary_writer, @as(u32, 0)); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .@"export", + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, wasm.exports.items.len) + @boolToInt(export_table) + @boolToInt(!import_memory), + ); + section_count += 1; + } + + // element section (function table) + if (wasm.function_table.count() > 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + var flags: u32 = 0x2; // Yes we have a table + try leb.writeULEB128(binary_writer, flags); + try leb.writeULEB128(binary_writer, @as(u32, 0)); // index of that table. TODO: Store synthetic symbols + try emitInit(binary_writer, .{ .i32_const = 1 }); // We start at index 1, so unresolved function pointers are invalid + try leb.writeULEB128(binary_writer, @as(u8, 0)); + try leb.writeULEB128(binary_writer, @intCast(u32, wasm.function_table.count())); + var symbol_it = wasm.function_table.keyIterator(); + while (symbol_it.next()) |symbol_loc_ptr| { + try leb.writeULEB128(binary_writer, symbol_loc_ptr.*.getSymbol(wasm).index); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .element, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @as(u32, 1), + ); + section_count += 1; + } + + // Code section + var code_section_size: u32 = 0; + if (wasm.code_section_index) |code_index| { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + var atom: *Atom = wasm.atoms.get(code_index).?.getFirst(); + + // The code section must be sorted in line with the function order. + var sorted_atoms = try std.ArrayList(*Atom).initCapacity(wasm.base.allocator, wasm.functions.count()); + defer sorted_atoms.deinit(); + + while (true) { + if (!is_obj) { + atom.resolveRelocs(wasm); + } + sorted_atoms.appendAssumeCapacity(atom); + atom = atom.next orelse break; + } + + const atom_sort_fn = struct { + fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool { + const lhs_sym = lhs.symbolLoc().getSymbol(ctx); + const rhs_sym = rhs.symbolLoc().getSymbol(ctx); + return lhs_sym.index < rhs_sym.index; + } + }.sort; + + std.sort.sort(*Atom, sorted_atoms.items, wasm, atom_sort_fn); + + for (sorted_atoms.items) |sorted_atom| { + try leb.writeULEB128(binary_writer, sorted_atom.size); + try binary_writer.writeAll(sorted_atom.code.items); + } + + code_section_size = @intCast(u32, binary_bytes.items.len - header_offset - header_size); + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .code, + code_section_size, + @intCast(u32, wasm.functions.count()), + ); + code_section_index = section_count; + section_count += 1; + } + + // Data section + if (wasm.data_segments.count() != 0) { + const header_offset = try reserveVecSectionHeader(&binary_bytes); + + var it = wasm.data_segments.iterator(); + var segment_count: u32 = 0; + while (it.next()) |entry| { + // do not output 'bss' section unless we import memory and therefore + // want to guarantee the data is zero initialized + if (!import_memory and std.mem.eql(u8, entry.key_ptr.*, ".bss")) continue; + segment_count += 1; + const atom_index = entry.value_ptr.*; + var atom: *Atom = wasm.atoms.getPtr(atom_index).?.*.getFirst(); + const segment = wasm.segments.items[atom_index]; + + // flag and index to memory section (currently, there can only be 1 memory section in wasm) + try leb.writeULEB128(binary_writer, @as(u32, 0)); + // offset into data section + try emitInit(binary_writer, .{ .i32_const = @bitCast(i32, segment.offset) }); + try leb.writeULEB128(binary_writer, segment.size); + + // fill in the offset table and the data segments + var current_offset: u32 = 0; + while (true) { + if (!is_obj) { + atom.resolveRelocs(wasm); + } + + // Pad with zeroes to ensure all segments are aligned + if (current_offset != atom.offset) { + const diff = atom.offset - current_offset; + try binary_writer.writeByteNTimes(0, diff); + current_offset += diff; + } + assert(current_offset == atom.offset); + assert(atom.code.items.len == atom.size); + try binary_writer.writeAll(atom.code.items); + + current_offset += atom.size; + if (atom.next) |next| { + atom = next; + } else { + // also pad with zeroes when last atom to ensure + // segments are aligned. + if (current_offset != segment.size) { + try binary_writer.writeByteNTimes(0, segment.size - current_offset); + current_offset += segment.size - current_offset; + } + break; + } + } + assert(current_offset == segment.size); + } + + try writeVecSectionHeader( + binary_bytes.items, + header_offset, + .data, + @intCast(u32, binary_bytes.items.len - header_offset - header_size), + @intCast(u32, segment_count), + ); + data_section_index = section_count; + section_count += 1; + } + + if (is_obj) { + // relocations need to point to the index of a symbol in the final symbol table. To save memory, + // we never store all symbols in a single table, but store a location reference instead. + // This means that for a relocatable object file, we need to generate one and provide it to the relocation sections. + var symbol_table = std.AutoArrayHashMap(SymbolLoc, u32).init(arena); + try wasm.emitLinkSection(&binary_bytes, &symbol_table); + if (code_section_index) |code_index| { + try wasm.emitCodeRelocations(&binary_bytes, code_index, symbol_table); + } + if (data_section_index) |data_index| { + try wasm.emitDataRelocations(&binary_bytes, data_index, symbol_table); + } + } else if (!wasm.base.options.strip) { + try wasm.emitNameSection(&binary_bytes, arena); + } + + if (!wasm.base.options.strip) { + if (wasm.dwarf) |*dwarf| { + const mod = wasm.base.options.module.?; + try dwarf.writeDbgAbbrev(); + // for debug info and ranges, the address is always 0, + // as locations are always offsets relative to 'code' section. + try dwarf.writeDbgInfoHeader(mod, 0, code_section_size); + try dwarf.writeDbgAranges(0, code_section_size); + try dwarf.writeDbgLineHeader(); + } + + var debug_bytes = std.ArrayList(u8).init(wasm.base.allocator); + defer debug_bytes.deinit(); + + const DebugSection = struct { + name: []const u8, + index: ?u32, + }; + + const debug_sections: []const DebugSection = &.{ + .{ .name = ".debug_info", .index = wasm.debug_info_index }, + .{ .name = ".debug_pubtypes", .index = wasm.debug_pubtypes_index }, + .{ .name = ".debug_abbrev", .index = wasm.debug_abbrev_index }, + .{ .name = ".debug_line", .index = wasm.debug_line_index }, + .{ .name = ".debug_str", .index = wasm.debug_str_index }, + .{ .name = ".debug_pubnames", .index = wasm.debug_pubnames_index }, + .{ .name = ".debug_loc", .index = wasm.debug_loc_index }, + .{ .name = ".debug_ranges", .index = wasm.debug_ranges_index }, + }; + + for (debug_sections) |item| { + if (item.index) |index| { + var atom = wasm.atoms.get(index).?.getFirst(); + while (true) { + atom.resolveRelocs(wasm); + try debug_bytes.appendSlice(atom.code.items); + atom = atom.next orelse break; + } + try emitDebugSection(&binary_bytes, debug_bytes.items, item.name); + debug_bytes.clearRetainingCapacity(); + } + } + + try emitProducerSection(&binary_bytes); + if (emit_features_count > 0) { + try emitFeaturesSection(&binary_bytes, &enabled_features, emit_features_count); + } + } + + // Only when writing all sections executed properly we write the magic + // bytes. This allows us to easily detect what went wrong while generating + // the final binary. + mem.copy(u8, binary_bytes.items, &(std.wasm.magic ++ std.wasm.version)); + + // finally, write the entire binary into the file. + var iovec = [_]std.os.iovec_const{.{ + .iov_base = binary_bytes.items.ptr, + .iov_len = binary_bytes.items.len, + }}; + try wasm.base.file.?.writevAll(&iovec); +} + fn emitDebugSection(binary_bytes: *std.ArrayList(u8), data: []const u8, name: []const u8) !void { if (data.len == 0) return; const header_offset = try reserveCustomSectionHeader(binary_bytes); |
