diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2021-07-01 15:11:55 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2021-07-15 18:49:46 +0200 |
| commit | 3622fe08dbdcaccb04204b48257e1d5fcbe0d164 (patch) | |
| tree | 10bac4d011d8ccd13ee3d550e998daa7d1678b2f /src | |
| parent | 9c3ebe0216306b5e346ec52959de41d1b4d504d9 (diff) | |
| download | zig-3622fe08dbdcaccb04204b48257e1d5fcbe0d164.tar.gz zig-3622fe08dbdcaccb04204b48257e1d5fcbe0d164.zip | |
zld: abstract away string table with fewer allocs
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 100 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 14 | ||||
| -rw-r--r-- | src/link/MachO/StringTable.zig | 84 |
3 files changed, 118 insertions, 80 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f6ff03e92b..59cfab4292 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -26,6 +26,7 @@ const target_util = @import("../target.zig"); const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Trie = @import("MachO/Trie.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); +const StringTable = @import("MachO/StringTable.zig"); const Zld = @import("MachO/Zld.zig"); usingnamespace @import("MachO/commands.zig"); @@ -116,9 +117,7 @@ offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, -/// Table of symbol names aka the string table. -string_table: std.ArrayListUnmanaged(u8) = .{}, -string_table_directory: std.StringHashMapUnmanaged(u32) = .{}, +strtab: StringTable = undefined, /// Table of GOT entries. offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, @@ -131,9 +130,9 @@ rebase_info_dirty: bool = false, binding_info_dirty: bool = false, lazy_binding_info_dirty: bool = false, export_info_dirty: bool = false, -string_table_dirty: bool = false, -string_table_needs_relocation: bool = false, +strtab_dirty: bool = false, +strtab_needs_relocation: bool = false, /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -413,6 +412,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { const self = try gpa.create(MachO); + self.* = .{ .base = .{ .tag = .macho, @@ -421,7 +421,9 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = if (options.target.cpu.arch == .aarch64) 0x4000 else 0x1000, + .strtab = try StringTable.init(gpa), }; + return self; } @@ -499,8 +501,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { assert(!self.binding_info_dirty); assert(!self.lazy_binding_info_dirty); assert(!self.export_info_dirty); - assert(!self.string_table_dirty); - assert(!self.string_table_needs_relocation); + assert(!self.strtab_dirty); + assert(!self.strtab_needs_relocation); if (target.cpu.arch == .aarch64) { switch (output_mode) { @@ -977,14 +979,7 @@ pub fn deinit(self: *MachO) void { self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); - { - var it = self.string_table_directory.keyIterator(); - while (it.next()) |key| { - self.base.allocator.free(key.*); - } - } - self.string_table_directory.deinit(self.base.allocator); - self.string_table.deinit(self.base.allocator); + self.strtab.deinit(); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); @@ -1202,7 +1197,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); - symbol.n_strx = try self.updateString(symbol.n_strx, new_name); + symbol.n_strx = try self.strtab.getOrPut(new_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; @@ -1214,7 +1209,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(decl_name); - const name_str_index = try self.makeString(decl_name); + const name_str_index = try self.strtab.getOrPut(decl_name); const addr = try self.allocateTextBlock(&decl.link.macho, code.len, required_alignment); log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); @@ -1404,14 +1399,14 @@ pub fn updateDeclExports( if (exp.link.macho.sym_index) |i| { const sym = &self.globals.items[i]; sym.* = .{ - .n_strx = try self.updateString(sym.n_strx, exp_name), + .n_strx = try self.strtab.getOrPut(exp_name), .n_type = n_type, .n_sect = @intCast(u8, self.text_section_index.?) + 1, .n_desc = n_desc, .n_value = decl_sym.n_value, }; } else { - const name_str_index = try self.makeString(exp_name); + const name_str_index = try self.strtab.getOrPut(exp_name); const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); self.export_info_dirty = true; @@ -1787,15 +1782,14 @@ pub fn populateMissingMetadata(self: *MachO) !void { symtab.symoff = @intCast(u32, symtab_off); symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. - const strtab_size = self.string_table.items.len; + const strtab_size = self.strtab.size(); const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); symtab.stroff = @intCast(u32, strtab_off); symtab.strsize = @intCast(u32, strtab_size); self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1930,7 +1924,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (!self.nonlazy_imports.contains("dyld_stub_binder")) { const index = @intCast(u32, self.nonlazy_imports.count()); const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - const offset = try self.makeString("dyld_stub_binder"); + const offset = try self.strtab.getOrPut("dyld_stub_binder"); try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{ .symbol = .{ .n_strx = offset, @@ -2061,49 +2055,9 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -fn makeString(self: *MachO, bytes: []const u8) !u32 { - if (self.string_table_directory.get(bytes)) |offset| { - log.debug("reusing '{s}' from string table at offset 0x{x}", .{ bytes, offset }); - return offset; - } - - try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1); - const offset = @intCast(u32, self.string_table.items.len); - - log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset }); - - self.string_table.appendSliceAssumeCapacity(bytes); - self.string_table.appendAssumeCapacity(0); - - try self.string_table_directory.putNoClobber( - self.base.allocator, - try self.base.allocator.dupe(u8, bytes), - offset, - ); - - self.string_table_dirty = true; - if (self.d_sym) |*ds| - ds.string_table_dirty = true; - - return offset; -} - -fn getString(self: *MachO, str_off: u32) []const u8 { - assert(str_off < self.string_table.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.string_table.items.ptr + str_off)); -} - -fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 { - const existing_name = self.getString(old_str_off); - if (mem.eql(u8, existing_name, new_name)) { - return old_str_off; - } - return self.makeString(new_name); -} - pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 { const index = @intCast(u32, self.lazy_imports.count()); - const offset = try self.makeString(name); + const offset = try self.strtab.getOrPut(name); const sym_name = try self.base.allocator.dupe(u8, name); const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem. try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{ @@ -2293,7 +2247,7 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void { }, } }; - const sym_name = self.getString(sym.n_strx); + const sym_name = self.strtab.get(sym.n_strx) orelse unreachable; log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name }); try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } @@ -2592,7 +2546,7 @@ fn relocateSymbolTable(self: *MachO) !void { const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); if (amt != existing_size) return error.InputOutput; symtab.symoff = @intCast(u32, new_symoff); - self.string_table_needs_relocation = true; + self.strtab_needs_relocation = true; } symtab.nsyms = @intCast(u32, nsyms); self.load_commands_dirty = true; @@ -2791,7 +2745,7 @@ fn writeExportTrie(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; for (self.globals.items) |symbol| { // TODO figure out if we should put all global symbols into the export trie - const name = self.getString(symbol.n_strx); + const name = self.strtab.get(symbol.n_strx) orelse unreachable; assert(symbol.n_value >= text_segment.inner.vmaddr); try trie.put(.{ .name = name, @@ -3065,26 +3019,26 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } fn writeStringTable(self: *MachO) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64)); - if (needed_size > allocated_size or self.string_table_needs_relocation) { + if (needed_size > allocated_size or self.strtab_needs_relocation) { symtab.strsize = 0; symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.string_table_needs_relocation = false; + self.strtab_needs_relocation = false; } symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.string_table.items, symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; } fn updateLinkeditSegmentSizes(self: *MachO) !void { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 62c2faad4b..38e13800a6 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -76,7 +76,7 @@ dbg_info_decl_last: ?*TextBlock = null, debug_string_table: std.ArrayListUnmanaged(u8) = .{}, load_commands_dirty: bool = false, -string_table_dirty: bool = false, +strtab_dirty: bool = false, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -131,7 +131,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void }, }); self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -593,7 +593,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt try self.writeHeader(); assert(!self.load_commands_dirty); - assert(!self.string_table_dirty); + assert(!self.strtab_dirty); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); @@ -807,14 +807,14 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { } fn writeStringTable(self: *DebugSymbols) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.base.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.size(), @alignOf(u64)); if (needed_size > allocated_size) { symtab.strsize = 0; @@ -823,9 +823,9 @@ fn writeStringTable(self: *DebugSymbols) !void { symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.string_table.items, symtab.stroff); + try self.file.pwriteAll(self.base.strtab.asSlice(), symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; } pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { diff --git a/src/link/MachO/StringTable.zig b/src/link/MachO/StringTable.zig new file mode 100644 index 0000000000..5437c70476 --- /dev/null +++ b/src/link/MachO/StringTable.zig @@ -0,0 +1,84 @@ +const StringTable = @This(); + +const std = @import("std"); +const log = std.log.scoped(.strtab); +const mem = std.mem; + +const Allocator = mem.Allocator; + +allocator: *Allocator, +buffer: std.ArrayListUnmanaged(u8) = .{}, +used_offsets: std.ArrayListUnmanaged(u32) = .{}, +cache: std.StringHashMapUnmanaged(u32) = .{}, + +pub const Error = error{OutOfMemory}; + +pub fn init(allocator: *Allocator) Error!StringTable { + var strtab = StringTable{ + .allocator = allocator, + }; + try strtab.buffer.append(allocator, 0); + return strtab; +} + +pub fn deinit(self: *StringTable) void { + self.cache.deinit(self.allocator); + self.used_offsets.deinit(self.allocator); + self.buffer.deinit(self.allocator); +} + +pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 { + if (self.cache.get(string)) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + const invalidate_cache = self.needsToGrow(string.len + 1); + + try self.buffer.ensureUnusedCapacity(self.allocator, string.len + 1); + const new_off = @intCast(u32, self.buffer.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.buffer.appendSliceAssumeCapacity(string); + self.buffer.appendAssumeCapacity(0); + + if (invalidate_cache) { + log.debug("invalidating cache", .{}); + // Re-create the cache. + self.cache.clearRetainingCapacity(); + for (self.used_offsets.items) |off| { + try self.cache.putNoClobber(self.allocator, self.get(off).?, off); + } + } + + { + log.debug("cache:", .{}); + var it = self.cache.iterator(); + while (it.next()) |entry| { + log.debug(" | {s} => {}", .{ entry.key_ptr.*, entry.value_ptr.* }); + } + } + + try self.cache.putNoClobber(self.allocator, self.get(new_off).?, new_off); + try self.used_offsets.append(self.allocator, new_off); + + return new_off; +} + +pub fn get(self: StringTable, off: u32) ?[]const u8 { + if (off >= self.buffer.items.len) return null; + return mem.spanZ(@ptrCast([*:0]const u8, self.buffer.items.ptr + off)); +} + +pub fn asSlice(self: StringTable) []const u8 { + return self.buffer.items; +} + +pub fn size(self: StringTable) u64 { + return self.buffer.items.len; +} + +fn needsToGrow(self: StringTable, needed_space: u64) bool { + return self.buffer.capacity < needed_space + self.size(); +} |
