diff options
Diffstat (limited to 'src/link')
| -rw-r--r-- | src/link/Elf.zig | 4 | ||||
| -rw-r--r-- | src/link/MachO.zig | 65 | ||||
| -rw-r--r-- | src/link/MachO/Trie.zig | 436 |
3 files changed, 482 insertions, 23 deletions
diff --git a/src/link/Elf.zig b/src/link/Elf.zig index c62bb29f78..a316a9c19e 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -2588,7 +2588,7 @@ pub fn updateDeclExports( }, }; const stt_bits: u8 = @truncate(u4, decl_sym.st_info); - if (exp.link.sym_index) |i| { + if (exp.link.elf.sym_index) |i| { const sym = &self.global_symbols.items[i]; sym.* = .{ .st_name = try self.updateString(sym.st_name, exp.options.name), @@ -2613,7 +2613,7 @@ pub fn updateDeclExports( .st_size = decl_sym.st_size, }; - exp.link.sym_index = @intCast(u32, i); + exp.link.elf.sym_index = @intCast(u32, i); } } } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a1b9484e13..697e4f0be3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,6 +20,8 @@ const File = link.File; const Cache = @import("../Cache.zig"); const target_util = @import("../target.zig"); +const Trie = @import("MachO/Trie.zig"); + pub const base_tag: File.Tag = File.Tag.macho; const LoadCommand = union(enum) { @@ -113,6 +115,9 @@ local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, /// Table of all undefined symbols undef_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + +global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, + dyld_stub_binder_index: ?u16 = null, /// Table of symbol names aka the string table. @@ -176,6 +181,10 @@ pub const TextBlock = struct { }; }; +pub const Export = struct { + sym_index: ?u32 = null, +}; + pub const SrcFn = struct { pub const empty = SrcFn{}; }; @@ -256,10 +265,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { switch (self.base.options.output_mode) { .Exe => { - if (self.entry_addr) |addr| { - // Write export trie. - try self.writeExportTrie(); + // Write export trie. + try self.writeExportTrie(); + if (self.entry_addr) |addr| { // Update LC_MAIN with entry offset const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const main_cmd = &self.load_commands.items[self.main_cmd_index.?].EntryPoint; @@ -711,6 +720,7 @@ pub fn deinit(self: *MachO) void { self.string_table.deinit(self.base.allocator); self.undef_symbols.deinit(self.base.allocator); self.global_symbols.deinit(self.base.allocator); + self.global_symbol_free_list.deinit(self.base.allocator); self.local_symbols.deinit(self.base.allocator); self.sections.deinit(self.base.allocator); self.load_commands.deinit(self.base.allocator); @@ -835,7 +845,7 @@ pub fn updateDeclExports( }, }; const n_type = decl_sym.n_type | macho.N_EXT; - if (exp.link.sym_index) |i| { + if (exp.link.macho.sym_index) |i| { const sym = &self.global_symbols.items[i]; sym.* = .{ .n_strx = try self.updateString(sym.n_strx, exp.options.name), @@ -846,8 +856,10 @@ pub fn updateDeclExports( }; } else { const name_str_index = try self.makeString(exp.options.name); - _ = self.global_symbols.addOneAssumeCapacity(); - const i = self.global_symbols.items.len - 1; + const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: { + _ = self.global_symbols.addOneAssumeCapacity(); + break :blk self.global_symbols.items.len - 1; + }; self.global_symbols.items[i] = .{ .n_strx = name_str_index, .n_type = n_type, @@ -856,11 +868,17 @@ pub fn updateDeclExports( .n_value = decl_sym.n_value, }; - exp.link.sym_index = @intCast(u32, i); + exp.link.macho.sym_index = @intCast(u32, i); } } } +pub fn deleteExport(self: *MachO, exp: Export) void { + const sym_index = exp.sym_index orelse return; + self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {}; + self.global_symbols.items[sym_index].n_type = 0; +} + pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {} pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { @@ -1383,25 +1401,30 @@ fn writeAllUndefSymbols(self: *MachO) !void { } fn writeExportTrie(self: *MachO) !void { - assert(self.entry_addr != null); + if (self.global_symbols.items.len == 0) return; // No exports, nothing to do. - // TODO implement mechanism for generating a prefix tree of the exported symbols - // single branch export trie - var buf = [_]u8{0} ** 24; - buf[0] = 0; // root node - buf[1] = 1; // 1 branch from root - mem.copy(u8, buf[2..], "_start"); - buf[8] = 0; - buf[9] = 9 + 1; + var trie: Trie = .{}; + defer trie.deinit(self.base.allocator); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const addr = self.entry_addr.? - text_segment.vmaddr; - const written = try std.debug.leb.writeULEB128Mem(buf[12..], addr); - buf[10] = @intCast(u8, written) + 1; - buf[11] = 0; + for (self.global_symbols.items) |symbol| { + // TODO figure out if we should put all global symbols into the export trie + const name = self.getString(symbol.n_strx); + assert(symbol.n_value >= text_segment.vmaddr); + try trie.put(self.base.allocator, .{ + .name = name, + .vmaddr_offset = symbol.n_value - text_segment.vmaddr, + .export_flags = 0, // TODO workout creation of export flags + }); + } + + var buffer: std.ArrayListUnmanaged(u8) = .{}; + defer buffer.deinit(self.base.allocator); + + try trie.writeULEB128Mem(self.base.allocator, &buffer); const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo; - try self.base.file.?.pwriteAll(buf[0..], dyld_info.export_off); + try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off); } fn writeStringTable(self: *MachO) !void { diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig new file mode 100644 index 0000000000..e077df101d --- /dev/null +++ b/src/link/MachO/Trie.zig @@ -0,0 +1,436 @@ +//! Represents export trie used in MachO executables and dynamic libraries. +//! The purpose of an export trie is to encode as compactly as possible all +//! export symbols for the loader `dyld`. +//! The export trie encodes offset and other information using ULEB128 +//! encoding, and is part of the __LINKEDIT segment. +//! +//! Description from loader.h: +//! +//! The symbols exported by a dylib are encoded in a trie. This is a compact +//! representation that factors out common prefixes. It also reduces LINKEDIT pages +//! in RAM because it encodes all information (name, address, flags) in one small, +//! contiguous range. The export area is a stream of nodes. The first node sequentially +//! is the start node for the trie. +//! +//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol +//! information for the string so far. If there is no exported symbol, the node starts +//! with a zero byte. If there is exported info, it follows the length. +//! +//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded +//! offset which is location of the content named by the symbol from the mach_header +//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags +//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string +//! is zero length, then the symbol is re-export from the specified dylib with the same name. +//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two +//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. +//! The resolver is used by lazy pointers and must be called to get the actual address to use. +//! +//! After the optional exported symbol information is a byte of how many edges (0-255) that +//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of +//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. +const Trie = @This(); + +const std = @import("std"); +const mem = std.mem; +const leb = std.debug.leb; +const log = std.log.scoped(.link); +const testing = std.testing; +const assert = std.debug.assert; +const Allocator = mem.Allocator; + +pub const Symbol = struct { + name: []const u8, + vmaddr_offset: u64, + export_flags: u64, +}; + +const Edge = struct { + from: *Node, + to: *Node, + label: []const u8, + + fn deinit(self: *Edge, alloc: *Allocator) void { + self.to.deinit(alloc); + alloc.destroy(self.to); + self.from = undefined; + self.to = undefined; + } +}; + +const Node = struct { + /// Export flags associated with this exported symbol (if any). + export_flags: ?u64 = null, + /// VM address offset wrt to the section this symbol is defined against (if any). + vmaddr_offset: ?u64 = null, + /// Offset of this node in the trie output byte stream. + trie_offset: ?usize = null, + /// List of all edges originating from this node. + edges: std.ArrayListUnmanaged(Edge) = .{}, + + fn deinit(self: *Node, alloc: *Allocator) void { + for (self.edges.items) |*edge| { + edge.deinit(alloc); + } + self.edges.deinit(alloc); + } + + const PutResult = struct { + /// Node reached at this stage of `put` op. + node: *Node, + /// Count of newly inserted nodes at this stage of `put` op. + node_count: usize, + }; + + /// Inserts a new node starting from `self`. + fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult { + var curr_node_count = node_count; + // Check for match with edges from this node. + for (self.edges.items) |*edge| { + const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{ + .node = edge.to, + .node_count = curr_node_count, + }; + if (match == 0) continue; + if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count); + + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid = try alloc.create(Node); + mid.* = .{}; + const to_label = edge.label; + const to_node = edge.to; + edge.to = mid; + edge.label = label[0..match]; + curr_node_count += 1; + + try mid.edges.append(alloc, .{ + .from = mid, + .to = to_node, + .label = to_label[match..], + }); + + if (match == label.len) { + return PutResult{ .node = to_node, .node_count = curr_node_count }; + } else { + return mid.put(alloc, label[match..], curr_node_count); + } + } + + // Add a new node. + const node = try alloc.create(Node); + node.* = .{}; + curr_node_count += 1; + + try self.edges.append(alloc, .{ + .from = self, + .to = node, + .label = label, + }); + + return PutResult{ .node = node, .node_count = curr_node_count }; + } + + /// This method should only be called *after* updateOffset has been called! + /// In case this is not upheld, this method will panic. + fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void { + assert(self.trie_offset != null); // You need to call updateOffset first. + if (self.vmaddr_offset) |offset| { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf_len: usize = 0; + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + info_buf_len += try leb.writeULEB128Mem(info_buf[0..], self.export_flags.?); + info_buf_len += try leb.writeULEB128Mem(info_buf[info_buf_len..], offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len); + + // Now, write them to the output buffer. + buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]); + buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]); + } else { + // Non-terminal node is delimited by 0 byte. + buffer.appendAssumeCapacity(0); + } + // Write number of edges (max legal number of edges is 256). + buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len)); + + for (self.edges.items) |edge| { + // Write edges labels. + buffer.appendSliceAssumeCapacity(edge.label); + buffer.appendAssumeCapacity(0); + + var buf: [@sizeOf(u64)]u8 = undefined; + const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset.?); + buffer.appendSliceAssumeCapacity(buf[0..buf_len]); + } + } + + const UpdateResult = struct { + /// Current size of this node in bytes. + node_size: usize, + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. + updated: bool, + }; + + /// Updates offset of this node in the output byte stream. + fn updateOffset(self: *Node, offset: usize) UpdateResult { + var node_size: usize = 0; + if (self.vmaddr_offset) |vmaddr| { + node_size += sizeULEB128Mem(self.export_flags.?); + node_size += sizeULEB128Mem(vmaddr); + node_size += sizeULEB128Mem(node_size); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count + + for (self.edges.items) |edge| { + const next_node_offset = edge.to.trie_offset orelse 0; + node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset); + } + + const trie_offset = self.trie_offset orelse 0; + const updated = offset != trie_offset; + self.trie_offset = offset; + + return .{ .node_size = node_size, .updated = updated }; + } + + /// Calculates number of bytes in ULEB128 encoding of value. + fn sizeULEB128Mem(value: u64) usize { + var res: usize = 0; + var v = value; + while (true) { + v = v >> 7; + res += 1; + if (v == 0) break; + } + return res; + } +}; + +/// Count of nodes in the trie. +/// The count is updated at every `put` call. +/// The trie always consists of at least a root node, hence +/// the count always starts at 1. +node_count: usize = 1, +/// The root node of the trie. +root: Node = .{}, + +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { + const res = try self.root.put(alloc, symbol.name, 0); + self.node_count += res.node_count; + res.node.vmaddr_offset = symbol.vmaddr_offset; + res.node.export_flags = symbol.export_flags; +} + +/// Write the trie to a buffer ULEB128 encoded. +pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void { + var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}; + defer ordered_nodes.deinit(alloc); + + try ordered_nodes.ensureCapacity(alloc, self.node_count); + walkInOrder(&self.root, &ordered_nodes); + + var offset: usize = 0; + var more: bool = true; + while (more) { + offset = 0; + more = false; + for (ordered_nodes.items) |node| { + const res = node.updateOffset(offset); + offset += res.node_size; + if (res.updated) more = true; + } + } + + try buffer.ensureCapacity(alloc, buffer.items.len + offset); + for (ordered_nodes.items) |node| { + try node.writeULEB128Mem(buffer); + } +} + +/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes. +fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void { + list.appendAssumeCapacity(node); + for (node.edges.items) |*edge| { + walkInOrder(edge.to, list); + } +} + +pub fn deinit(self: *Trie, alloc: *Allocator) void { + self.root.deinit(alloc); +} + +test "Trie node count" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + testing.expectEqual(trie.node_count, 1); + + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 2); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 2); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); + + // Inserting the same node shouldn't update the trie. + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expectEqual(trie.node_count, 4); +} + +test "Trie basic" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + // root + testing.expect(trie.root.edges.items.len == 0); + + // root --- _st ---> node + try trie.put(gpa, .{ + .name = "_st", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); + + { + // root --- _st ---> node --- art ---> node + try trie.put(gpa, .{ + .name = "_start", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_st")); + testing.expect(nextEdge.to.edges.items.len == 1); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); + } + { + // root --- _ ---> node --- st ---> node --- art ---> node + // | + // | --- main ---> node + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0, + .export_flags = 0, + }); + testing.expect(trie.root.edges.items.len == 1); + + const nextEdge = &trie.root.edges.items[0]; + testing.expect(mem.eql(u8, nextEdge.label, "_")); + testing.expect(nextEdge.to.edges.items.len == 2); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); + testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); + + const nextNextEdge = &nextEdge.to.edges.items[0]; + testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); + } +} + +test "Trie.writeULEB128Mem" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + try trie.put(gpa, .{ + .name = "__mh_execute_header", + .vmaddr_offset = 0, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_main", + .vmaddr_offset = 0x1000, + .export_flags = 0, + }); + + var buffer: std.ArrayListUnmanaged(u8) = .{}; + defer buffer.deinit(gpa); + + try trie.writeULEB128Mem(gpa, &buffer); + + const exp_buffer = [_]u8{ + 0x0, + 0x1, + 0x5f, + 0x0, + 0x5, + 0x0, + 0x2, + 0x5f, + 0x6d, + 0x68, + 0x5f, + 0x65, + 0x78, + 0x65, + 0x63, + 0x75, + 0x74, + 0x65, + 0x5f, + 0x68, + 0x65, + 0x61, + 0x64, + 0x65, + 0x72, + 0x0, + 0x21, + 0x6d, + 0x61, + 0x69, + 0x6e, + 0x0, + 0x25, + 0x2, + 0x0, + 0x0, + 0x0, + 0x3, + 0x0, + 0x80, + 0x20, + 0x0, + }; + + testing.expect(buffer.items.len == exp_buffer.len); + testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..])); +} |
