//! Represents export trie used in MachO executables and dynamic libraries. //! The purpose of an export trie is to encode as compactly as possible all //! export symbols for the loader `dyld`. //! The export trie encodes offset and other information using ULEB128 //! encoding, and is part of the __LINKEDIT segment. //! //! Description from loader.h: //! //! The symbols exported by a dylib are encoded in a trie. This is a compact //! representation that factors out common prefixes. It also reduces LINKEDIT pages //! in RAM because it encodes all information (name, address, flags) in one small, //! contiguous range. The export area is a stream of nodes. The first node sequentially //! is the start node for the trie. //! //! Nodes for a symbol start with a uleb128 that is the length of the exported symbol //! information for the string so far. If there is no exported symbol, the node starts //! with a zero byte. If there is exported info, it follows the length. //! //! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded //! offset which is location of the content named by the symbol from the mach_header //! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags //! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string //! is zero length, then the symbol is re-export from the specified dylib with the same name. //! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two //! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. //! The resolver is used by lazy pointers and must be called to get the actual address to use. //! //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. /// The root node of the trie. root: ?Node.Index = null, buffer: std.ArrayList(u8) = .empty, nodes: std.MultiArrayList(Node) = .{}, edges: std.ArrayList(Edge) = .empty, /// Insert a symbol into the trie, updating the prefixes in the process. /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { // const tracy = trace(@src()); // defer tracy.end(); const node_index = try self.putNode(self.root.?, allocator, symbol.name); const slice = self.nodes.slice(); slice.items(.is_terminal)[node_index] = true; slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset; slice.items(.export_flags)[node_index] = symbol.export_flags; } /// Inserts a new node starting at `node_index`. fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index { // Check for match with edges from this node. for (self.nodes.items(.edges)[node_index].items) |edge_index| { const edge = &self.edges.items[edge_index]; const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node; if (match == 0) continue; if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]); // Found a match, need to splice up nodes. // From: A -> B // To: A -> C -> B const mid_index = try self.addNode(allocator); const to_label = edge.label[match..]; const to_node = edge.node; edge.node = mid_index; edge.label = label[0..match]; const new_edge_index = try self.addEdge(allocator); const new_edge = &self.edges.items[new_edge_index]; new_edge.node = to_node; new_edge.label = to_label; try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index); return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]); } // Add a new node. const new_node_index = try self.addNode(allocator); const new_edge_index = try self.addEdge(allocator); const new_edge = &self.edges.items[new_edge_index]; new_edge.node = new_node_index; new_edge.label = label; try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index); return new_node_index; } pub fn updateSize(self: *Trie, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.comp.gpa; try self.init(gpa); try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); const seg = macho_file.getTextSegment(); for (macho_file.resolver.values.items) |ref| { if (ref.getFile(macho_file) == null) continue; const sym = ref.getSymbol(macho_file).?; if (!sym.flags.@"export") continue; if (sym.getAtom(macho_file)) |atom| if (!atom.isAlive()) continue; var flags: u64 = if (sym.flags.abs) macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE else if (sym.flags.tlv) macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL else macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; if (sym.flags.weak) { flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; macho_file.weak_defines.store(true, .seq_cst); macho_file.binds_to_weak.store(true, .seq_cst); } try self.put(gpa, .{ .name = sym.getName(macho_file), .vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr, .export_flags = flags, }); } try self.finalize(gpa); macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } /// Finalizes this trie for writing to a byte stream. /// This step performs multiple passes through the trie ensuring /// there are no gaps after every `Node` is ULEB128 encoded. /// Call this method before trying to `write` the trie to a byte stream. fn finalize(self: *Trie, allocator: Allocator) !void { const tracy = trace(@src()); defer tracy.end(); var ordered_nodes = std.array_list.Managed(Node.Index).init(allocator); defer ordered_nodes.deinit(); try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len); { var fifo: std.ArrayList(Node.Index) = .empty; defer fifo.deinit(allocator); try fifo.append(allocator, self.root.?); var i: usize = 0; while (i < fifo.items.len) { const next_index = fifo.items[i]; i += 1; const edges = &self.nodes.items(.edges)[next_index]; for (edges.items) |edge_index| { const edge = self.edges.items[edge_index]; try fifo.append(allocator, edge.node); } ordered_nodes.appendAssumeCapacity(next_index); } } var more: bool = true; var size: u32 = 0; while (more) { size = 0; more = false; for (ordered_nodes.items) |node_index| { const res = try self.finalizeNode(node_index, size); size += res.node_size; if (res.updated) more = true; } } try self.buffer.ensureTotalCapacityPrecise(allocator, size); var allocating: std.Io.Writer.Allocating = .fromArrayList(allocator, &self.buffer); defer self.buffer = allocating.toArrayList(); const writer = &allocating.writer; for (ordered_nodes.items) |node_index| { try self.writeNode(node_index, writer); } } const FinalizeNodeResult = struct { /// Current size of this node in bytes. node_size: u32, /// True if the trie offset of this node in the output byte stream /// would need updating; false otherwise. updated: bool, }; /// Updates offset of this node in the output byte stream. fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult { var trash_buffer: [64]u8 = undefined; var stream: std.Io.Writer.Discarding = .init(&trash_buffer); const writer = &stream.writer; const slice = self.nodes.slice(); var node_size: u32 = 0; if (slice.items(.is_terminal)[node_index]) { const export_flags = slice.items(.export_flags)[node_index]; const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; try writer.writeUleb128(export_flags); try writer.writeUleb128(vmaddr_offset); try writer.writeUleb128(stream.fullCount()); } else { node_size += 1; // 0x0 for non-terminal nodes } node_size += 1; // 1 byte for edge count for (slice.items(.edges)[node_index].items) |edge_index| { const edge = &self.edges.items[edge_index]; const next_node_offset = slice.items(.trie_offset)[edge.node]; node_size += @intCast(edge.label.len + 1); try writer.writeUleb128(next_node_offset); } const trie_offset = slice.items(.trie_offset)[node_index]; const updated = offset_in_trie != trie_offset; slice.items(.trie_offset)[node_index] = offset_in_trie; node_size += @intCast(stream.fullCount()); return .{ .node_size = node_size, .updated = updated }; } fn init(self: *Trie, allocator: Allocator) !void { assert(self.root == null); self.root = try self.addNode(allocator); } pub fn deinit(self: *Trie, allocator: Allocator) void { for (self.nodes.items(.edges)) |*edges| { edges.deinit(allocator); } self.nodes.deinit(allocator); self.edges.deinit(allocator); self.buffer.deinit(allocator); } pub fn write(self: Trie, writer: *std.Io.Writer) !void { if (self.buffer.items.len == 0) return; try writer.writeAll(self.buffer.items); } /// Writes this node to a byte stream. /// The children of this node *are* not written to the byte stream /// recursively. To write all nodes to a byte stream in sequence, /// iterate over `Trie.ordered_nodes` and call this method on each node. /// This is one of the requirements of the MachO. /// Panics if `finalize` was not called before calling this method. fn writeNode(self: *Trie, node_index: Node.Index, writer: *std.Io.Writer) !void { const slice = self.nodes.slice(); const edges = slice.items(.edges)[node_index]; const is_terminal = slice.items(.is_terminal)[node_index]; const export_flags = slice.items(.export_flags)[node_index]; const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; if (is_terminal) { // Terminal node info: encode export flags and vmaddr offset of this symbol. var info_buf: [@sizeOf(u64) * 2]u8 = undefined; var info_stream: std.Io.Writer = .fixed(&info_buf); // TODO Implement for special flags. assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); try info_stream.writeUleb128(export_flags); try info_stream.writeUleb128(vmaddr_offset); // Encode the size of the terminal node info. var size_buf: [@sizeOf(u64)]u8 = undefined; var size_stream: std.Io.Writer = .fixed(&size_buf); try size_stream.writeUleb128(info_stream.end); // Now, write them to the output stream. try writer.writeAll(size_buf[0..size_stream.end]); try writer.writeAll(info_buf[0..info_stream.end]); } else { // Non-terminal node is delimited by 0 byte. try writer.writeByte(0); } // Write number of edges (max legal number of edges is 256). try writer.writeByte(@as(u8, @intCast(edges.items.len))); for (edges.items) |edge_index| { const edge = self.edges.items[edge_index]; // Write edge label and offset to next node in trie. try writer.writeAll(edge.label); try writer.writeByte(0); try writer.writeUleb128(slice.items(.trie_offset)[edge.node]); } } fn addNode(self: *Trie, allocator: Allocator) !Node.Index { const index: Node.Index = @intCast(try self.nodes.addOne(allocator)); self.nodes.set(index, .{}); return index; } fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index { const index: Edge.Index = @intCast(self.edges.items.len); const edge = try self.edges.addOne(allocator); edge.* = .{}; return index; } /// Export symbol that is to be placed in the trie. pub const ExportSymbol = struct { /// Name of the symbol. name: []const u8, /// Offset of this symbol's virtual memory address from the beginning /// of the __TEXT segment. vmaddr_offset: u64, /// Export flags of this exported symbol. export_flags: u64, }; const Node = struct { is_terminal: bool = false, /// Export flags associated with this exported symbol. export_flags: u64 = 0, /// VM address offset wrt to the section this symbol is defined against. vmaddr_offset: u64 = 0, /// Offset of this node in the trie output byte stream. trie_offset: u32 = 0, /// List of all edges originating from this node. edges: std.ArrayList(Edge.Index) = .empty, const Index = u32; }; /// Edge connecting nodes in the trie. const Edge = struct { /// Target node in the trie. node: Node.Index = 0, /// Matching prefix. label: []const u8 = "", const Index = u32; }; fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { assert(expected.len > 0); if (mem.eql(u8, expected, given)) return; const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{expected}); defer testing.allocator.free(expected_fmt); const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{given}); defer testing.allocator.free(given_fmt); const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; const padding = try testing.allocator.alloc(u8, idx + 5); defer testing.allocator.free(padding); @memset(padding, ' '); std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); return error.TestFailed; } test "write Trie to a byte stream" { const gpa = testing.allocator; var trie: Trie = .{}; defer trie.deinit(gpa); try trie.init(gpa); try trie.put(gpa, .{ .name = "__mh_execute_header", .vmaddr_offset = 0, .export_flags = 0, }); try trie.put(gpa, .{ .name = "_main", .vmaddr_offset = 0x1000, .export_flags = 0, }); try trie.finalize(gpa); const exp_buffer = [_]u8{ 0x0, 0x1, // node root 0x5f, 0x0, 0x5, // edge '_' 0x0, 0x2, // non-terminal node 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' 0x2, 0x0, 0x0, 0x0, // terminal node 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node }; try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } test "ordering bug" { const gpa = testing.allocator; var trie: Trie = .{}; defer trie.deinit(gpa); try trie.init(gpa); try trie.put(gpa, .{ .name = "_asStr", .vmaddr_offset = 0x558, .export_flags = 0, }); try trie.put(gpa, .{ .name = "_a", .vmaddr_offset = 0x8008, .export_flags = 0, }); try trie.finalize(gpa); const exp_buffer = [_]u8{ 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, }; try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } const assert = std.debug.assert; const log = std.log.scoped(.macho); const macho = std.macho; const mem = std.mem; const std = @import("std"); const testing = std.testing; const trace = @import("../../../tracy.zig").trace; const Allocator = mem.Allocator; const MachO = @import("../../MachO.zig"); const Trie = @This();