aboutsummaryrefslogtreecommitdiff
path: root/src/link
diff options
context:
space:
mode:
Diffstat (limited to 'src/link')
-rw-r--r--src/link/Elf.zig4
-rw-r--r--src/link/MachO.zig65
-rw-r--r--src/link/MachO/Trie.zig436
3 files changed, 482 insertions, 23 deletions
diff --git a/src/link/Elf.zig b/src/link/Elf.zig
index c62bb29f78..a316a9c19e 100644
--- a/src/link/Elf.zig
+++ b/src/link/Elf.zig
@@ -2588,7 +2588,7 @@ pub fn updateDeclExports(
},
};
const stt_bits: u8 = @truncate(u4, decl_sym.st_info);
- if (exp.link.sym_index) |i| {
+ if (exp.link.elf.sym_index) |i| {
const sym = &self.global_symbols.items[i];
sym.* = .{
.st_name = try self.updateString(sym.st_name, exp.options.name),
@@ -2613,7 +2613,7 @@ pub fn updateDeclExports(
.st_size = decl_sym.st_size,
};
- exp.link.sym_index = @intCast(u32, i);
+ exp.link.elf.sym_index = @intCast(u32, i);
}
}
}
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index a1b9484e13..697e4f0be3 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -20,6 +20,8 @@ const File = link.File;
const Cache = @import("../Cache.zig");
const target_util = @import("../target.zig");
+const Trie = @import("MachO/Trie.zig");
+
pub const base_tag: File.Tag = File.Tag.macho;
const LoadCommand = union(enum) {
@@ -113,6 +115,9 @@ local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
/// Table of all undefined symbols
undef_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+
+global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
+
dyld_stub_binder_index: ?u16 = null,
/// Table of symbol names aka the string table.
@@ -176,6 +181,10 @@ pub const TextBlock = struct {
};
};
+pub const Export = struct {
+ sym_index: ?u32 = null,
+};
+
pub const SrcFn = struct {
pub const empty = SrcFn{};
};
@@ -256,10 +265,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
switch (self.base.options.output_mode) {
.Exe => {
- if (self.entry_addr) |addr| {
- // Write export trie.
- try self.writeExportTrie();
+ // Write export trie.
+ try self.writeExportTrie();
+ if (self.entry_addr) |addr| {
// Update LC_MAIN with entry offset
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
const main_cmd = &self.load_commands.items[self.main_cmd_index.?].EntryPoint;
@@ -711,6 +720,7 @@ pub fn deinit(self: *MachO) void {
self.string_table.deinit(self.base.allocator);
self.undef_symbols.deinit(self.base.allocator);
self.global_symbols.deinit(self.base.allocator);
+ self.global_symbol_free_list.deinit(self.base.allocator);
self.local_symbols.deinit(self.base.allocator);
self.sections.deinit(self.base.allocator);
self.load_commands.deinit(self.base.allocator);
@@ -835,7 +845,7 @@ pub fn updateDeclExports(
},
};
const n_type = decl_sym.n_type | macho.N_EXT;
- if (exp.link.sym_index) |i| {
+ if (exp.link.macho.sym_index) |i| {
const sym = &self.global_symbols.items[i];
sym.* = .{
.n_strx = try self.updateString(sym.n_strx, exp.options.name),
@@ -846,8 +856,10 @@ pub fn updateDeclExports(
};
} else {
const name_str_index = try self.makeString(exp.options.name);
- _ = self.global_symbols.addOneAssumeCapacity();
- const i = self.global_symbols.items.len - 1;
+ const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: {
+ _ = self.global_symbols.addOneAssumeCapacity();
+ break :blk self.global_symbols.items.len - 1;
+ };
self.global_symbols.items[i] = .{
.n_strx = name_str_index,
.n_type = n_type,
@@ -856,11 +868,17 @@ pub fn updateDeclExports(
.n_value = decl_sym.n_value,
};
- exp.link.sym_index = @intCast(u32, i);
+ exp.link.macho.sym_index = @intCast(u32, i);
}
}
}
+pub fn deleteExport(self: *MachO, exp: Export) void {
+ const sym_index = exp.sym_index orelse return;
+ self.global_symbol_free_list.append(self.base.allocator, sym_index) catch {};
+ self.global_symbols.items[sym_index].n_type = 0;
+}
+
pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
@@ -1383,25 +1401,30 @@ fn writeAllUndefSymbols(self: *MachO) !void {
}
fn writeExportTrie(self: *MachO) !void {
- assert(self.entry_addr != null);
+ if (self.global_symbols.items.len == 0) return; // No exports, nothing to do.
- // TODO implement mechanism for generating a prefix tree of the exported symbols
- // single branch export trie
- var buf = [_]u8{0} ** 24;
- buf[0] = 0; // root node
- buf[1] = 1; // 1 branch from root
- mem.copy(u8, buf[2..], "_start");
- buf[8] = 0;
- buf[9] = 9 + 1;
+ var trie: Trie = .{};
+ defer trie.deinit(self.base.allocator);
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
- const addr = self.entry_addr.? - text_segment.vmaddr;
- const written = try std.debug.leb.writeULEB128Mem(buf[12..], addr);
- buf[10] = @intCast(u8, written) + 1;
- buf[11] = 0;
+ for (self.global_symbols.items) |symbol| {
+ // TODO figure out if we should put all global symbols into the export trie
+ const name = self.getString(symbol.n_strx);
+ assert(symbol.n_value >= text_segment.vmaddr);
+ try trie.put(self.base.allocator, .{
+ .name = name,
+ .vmaddr_offset = symbol.n_value - text_segment.vmaddr,
+ .export_flags = 0, // TODO workout creation of export flags
+ });
+ }
+
+ var buffer: std.ArrayListUnmanaged(u8) = .{};
+ defer buffer.deinit(self.base.allocator);
+
+ try trie.writeULEB128Mem(self.base.allocator, &buffer);
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfo;
- try self.base.file.?.pwriteAll(buf[0..], dyld_info.export_off);
+ try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
}
fn writeStringTable(self: *MachO) !void {
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
new file mode 100644
index 0000000000..e077df101d
--- /dev/null
+++ b/src/link/MachO/Trie.zig
@@ -0,0 +1,436 @@
+//! Represents export trie used in MachO executables and dynamic libraries.
+//! The purpose of an export trie is to encode as compactly as possible all
+//! export symbols for the loader `dyld`.
+//! The export trie encodes offset and other information using ULEB128
+//! encoding, and is part of the __LINKEDIT segment.
+//!
+//! Description from loader.h:
+//!
+//! The symbols exported by a dylib are encoded in a trie. This is a compact
+//! representation that factors out common prefixes. It also reduces LINKEDIT pages
+//! in RAM because it encodes all information (name, address, flags) in one small,
+//! contiguous range. The export area is a stream of nodes. The first node sequentially
+//! is the start node for the trie.
+//!
+//! Nodes for a symbol start with a uleb128 that is the length of the exported symbol
+//! information for the string so far. If there is no exported symbol, the node starts
+//! with a zero byte. If there is exported info, it follows the length.
+//!
+//! First is a uleb128 containing flags. Normally, it is followed by a uleb128 encoded
+//! offset which is location of the content named by the symbol from the mach_header
+//! for the image. If the flags is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags
+//! is a uleb128 encoded library ordinal, then a zero terminated UTF8 string. If the string
+//! is zero length, then the symbol is re-export from the specified dylib with the same name.
+//! If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following the flags is two
+//! uleb128s: the stub offset and the resolver offset. The stub is used by non-lazy pointers.
+//! The resolver is used by lazy pointers and must be called to get the actual address to use.
+//!
+//! After the optional exported symbol information is a byte of how many edges (0-255) that
+//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
+//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
+const Trie = @This();
+
+const std = @import("std");
+const mem = std.mem;
+const leb = std.debug.leb;
+const log = std.log.scoped(.link);
+const testing = std.testing;
+const assert = std.debug.assert;
+const Allocator = mem.Allocator;
+
+pub const Symbol = struct {
+ name: []const u8,
+ vmaddr_offset: u64,
+ export_flags: u64,
+};
+
+const Edge = struct {
+ from: *Node,
+ to: *Node,
+ label: []const u8,
+
+ fn deinit(self: *Edge, alloc: *Allocator) void {
+ self.to.deinit(alloc);
+ alloc.destroy(self.to);
+ self.from = undefined;
+ self.to = undefined;
+ }
+};
+
+const Node = struct {
+ /// Export flags associated with this exported symbol (if any).
+ export_flags: ?u64 = null,
+ /// VM address offset wrt to the section this symbol is defined against (if any).
+ vmaddr_offset: ?u64 = null,
+ /// Offset of this node in the trie output byte stream.
+ trie_offset: ?usize = null,
+ /// List of all edges originating from this node.
+ edges: std.ArrayListUnmanaged(Edge) = .{},
+
+ fn deinit(self: *Node, alloc: *Allocator) void {
+ for (self.edges.items) |*edge| {
+ edge.deinit(alloc);
+ }
+ self.edges.deinit(alloc);
+ }
+
+ const PutResult = struct {
+ /// Node reached at this stage of `put` op.
+ node: *Node,
+ /// Count of newly inserted nodes at this stage of `put` op.
+ node_count: usize,
+ };
+
+ /// Inserts a new node starting from `self`.
+ fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult {
+ var curr_node_count = node_count;
+ // Check for match with edges from this node.
+ for (self.edges.items) |*edge| {
+ const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{
+ .node = edge.to,
+ .node_count = curr_node_count,
+ };
+ if (match == 0) continue;
+ if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count);
+
+ // Found a match, need to splice up nodes.
+ // From: A -> B
+ // To: A -> C -> B
+ const mid = try alloc.create(Node);
+ mid.* = .{};
+ const to_label = edge.label;
+ const to_node = edge.to;
+ edge.to = mid;
+ edge.label = label[0..match];
+ curr_node_count += 1;
+
+ try mid.edges.append(alloc, .{
+ .from = mid,
+ .to = to_node,
+ .label = to_label[match..],
+ });
+
+ if (match == label.len) {
+ return PutResult{ .node = to_node, .node_count = curr_node_count };
+ } else {
+ return mid.put(alloc, label[match..], curr_node_count);
+ }
+ }
+
+ // Add a new node.
+ const node = try alloc.create(Node);
+ node.* = .{};
+ curr_node_count += 1;
+
+ try self.edges.append(alloc, .{
+ .from = self,
+ .to = node,
+ .label = label,
+ });
+
+ return PutResult{ .node = node, .node_count = curr_node_count };
+ }
+
+ /// This method should only be called *after* updateOffset has been called!
+ /// In case this is not upheld, this method will panic.
+ fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void {
+ assert(self.trie_offset != null); // You need to call updateOffset first.
+ if (self.vmaddr_offset) |offset| {
+ // Terminal node info: encode export flags and vmaddr offset of this symbol.
+ var info_buf_len: usize = 0;
+ var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
+ info_buf_len += try leb.writeULEB128Mem(info_buf[0..], self.export_flags.?);
+ info_buf_len += try leb.writeULEB128Mem(info_buf[info_buf_len..], offset);
+
+ // Encode the size of the terminal node info.
+ var size_buf: [@sizeOf(u64)]u8 = undefined;
+ const size_buf_len = try leb.writeULEB128Mem(size_buf[0..], info_buf_len);
+
+ // Now, write them to the output buffer.
+ buffer.appendSliceAssumeCapacity(size_buf[0..size_buf_len]);
+ buffer.appendSliceAssumeCapacity(info_buf[0..info_buf_len]);
+ } else {
+ // Non-terminal node is delimited by 0 byte.
+ buffer.appendAssumeCapacity(0);
+ }
+ // Write number of edges (max legal number of edges is 256).
+ buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len));
+
+ for (self.edges.items) |edge| {
+ // Write edges labels.
+ buffer.appendSliceAssumeCapacity(edge.label);
+ buffer.appendAssumeCapacity(0);
+
+ var buf: [@sizeOf(u64)]u8 = undefined;
+ const buf_len = try leb.writeULEB128Mem(buf[0..], edge.to.trie_offset.?);
+ buffer.appendSliceAssumeCapacity(buf[0..buf_len]);
+ }
+ }
+
+ const UpdateResult = struct {
+ /// Current size of this node in bytes.
+ node_size: usize,
+ /// True if the trie offset of this node in the output byte stream
+ /// would need updating; false otherwise.
+ updated: bool,
+ };
+
+ /// Updates offset of this node in the output byte stream.
+ fn updateOffset(self: *Node, offset: usize) UpdateResult {
+ var node_size: usize = 0;
+ if (self.vmaddr_offset) |vmaddr| {
+ node_size += sizeULEB128Mem(self.export_flags.?);
+ node_size += sizeULEB128Mem(vmaddr);
+ node_size += sizeULEB128Mem(node_size);
+ } else {
+ node_size += 1; // 0x0 for non-terminal nodes
+ }
+ node_size += 1; // 1 byte for edge count
+
+ for (self.edges.items) |edge| {
+ const next_node_offset = edge.to.trie_offset orelse 0;
+ node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset);
+ }
+
+ const trie_offset = self.trie_offset orelse 0;
+ const updated = offset != trie_offset;
+ self.trie_offset = offset;
+
+ return .{ .node_size = node_size, .updated = updated };
+ }
+
+ /// Calculates number of bytes in ULEB128 encoding of value.
+ fn sizeULEB128Mem(value: u64) usize {
+ var res: usize = 0;
+ var v = value;
+ while (true) {
+ v = v >> 7;
+ res += 1;
+ if (v == 0) break;
+ }
+ return res;
+ }
+};
+
+/// Count of nodes in the trie.
+/// The count is updated at every `put` call.
+/// The trie always consists of at least a root node, hence
+/// the count always starts at 1.
+node_count: usize = 1,
+/// The root node of the trie.
+root: Node = .{},
+
+/// Insert a symbol into the trie, updating the prefixes in the process.
+/// This operation may change the layout of the trie by splicing edges in
+/// certain circumstances.
+pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
+ const res = try self.root.put(alloc, symbol.name, 0);
+ self.node_count += res.node_count;
+ res.node.vmaddr_offset = symbol.vmaddr_offset;
+ res.node.export_flags = symbol.export_flags;
+}
+
+/// Write the trie to a buffer ULEB128 encoded.
+pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
+ var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
+ defer ordered_nodes.deinit(alloc);
+
+ try ordered_nodes.ensureCapacity(alloc, self.node_count);
+ walkInOrder(&self.root, &ordered_nodes);
+
+ var offset: usize = 0;
+ var more: bool = true;
+ while (more) {
+ offset = 0;
+ more = false;
+ for (ordered_nodes.items) |node| {
+ const res = node.updateOffset(offset);
+ offset += res.node_size;
+ if (res.updated) more = true;
+ }
+ }
+
+ try buffer.ensureCapacity(alloc, buffer.items.len + offset);
+ for (ordered_nodes.items) |node| {
+ try node.writeULEB128Mem(buffer);
+ }
+}
+
+/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes.
+fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void {
+ list.appendAssumeCapacity(node);
+ for (node.edges.items) |*edge| {
+ walkInOrder(edge.to, list);
+ }
+}
+
+pub fn deinit(self: *Trie, alloc: *Allocator) void {
+ self.root.deinit(alloc);
+}
+
+test "Trie node count" {
+ var gpa = testing.allocator;
+ var trie: Trie = .{};
+ defer trie.deinit(gpa);
+
+ testing.expectEqual(trie.node_count, 1);
+
+ try trie.put(gpa, .{
+ .name = "_main",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expectEqual(trie.node_count, 2);
+
+ // Inserting the same node shouldn't update the trie.
+ try trie.put(gpa, .{
+ .name = "_main",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expectEqual(trie.node_count, 2);
+
+ try trie.put(gpa, .{
+ .name = "__mh_execute_header",
+ .vmaddr_offset = 0x1000,
+ .export_flags = 0,
+ });
+ testing.expectEqual(trie.node_count, 4);
+
+ // Inserting the same node shouldn't update the trie.
+ try trie.put(gpa, .{
+ .name = "__mh_execute_header",
+ .vmaddr_offset = 0x1000,
+ .export_flags = 0,
+ });
+ testing.expectEqual(trie.node_count, 4);
+ try trie.put(gpa, .{
+ .name = "_main",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expectEqual(trie.node_count, 4);
+}
+
+test "Trie basic" {
+ var gpa = testing.allocator;
+ var trie: Trie = .{};
+ defer trie.deinit(gpa);
+
+ // root
+ testing.expect(trie.root.edges.items.len == 0);
+
+ // root --- _st ---> node
+ try trie.put(gpa, .{
+ .name = "_st",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expect(trie.root.edges.items.len == 1);
+ testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
+
+ {
+ // root --- _st ---> node --- art ---> node
+ try trie.put(gpa, .{
+ .name = "_start",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expect(trie.root.edges.items.len == 1);
+
+ const nextEdge = &trie.root.edges.items[0];
+ testing.expect(mem.eql(u8, nextEdge.label, "_st"));
+ testing.expect(nextEdge.to.edges.items.len == 1);
+ testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
+ }
+ {
+ // root --- _ ---> node --- st ---> node --- art ---> node
+ // |
+ // | --- main ---> node
+ try trie.put(gpa, .{
+ .name = "_main",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ testing.expect(trie.root.edges.items.len == 1);
+
+ const nextEdge = &trie.root.edges.items[0];
+ testing.expect(mem.eql(u8, nextEdge.label, "_"));
+ testing.expect(nextEdge.to.edges.items.len == 2);
+ testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
+ testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main"));
+
+ const nextNextEdge = &nextEdge.to.edges.items[0];
+ testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art"));
+ }
+}
+
+test "Trie.writeULEB128Mem" {
+ var gpa = testing.allocator;
+ var trie: Trie = .{};
+ defer trie.deinit(gpa);
+
+ try trie.put(gpa, .{
+ .name = "__mh_execute_header",
+ .vmaddr_offset = 0,
+ .export_flags = 0,
+ });
+ try trie.put(gpa, .{
+ .name = "_main",
+ .vmaddr_offset = 0x1000,
+ .export_flags = 0,
+ });
+
+ var buffer: std.ArrayListUnmanaged(u8) = .{};
+ defer buffer.deinit(gpa);
+
+ try trie.writeULEB128Mem(gpa, &buffer);
+
+ const exp_buffer = [_]u8{
+ 0x0,
+ 0x1,
+ 0x5f,
+ 0x0,
+ 0x5,
+ 0x0,
+ 0x2,
+ 0x5f,
+ 0x6d,
+ 0x68,
+ 0x5f,
+ 0x65,
+ 0x78,
+ 0x65,
+ 0x63,
+ 0x75,
+ 0x74,
+ 0x65,
+ 0x5f,
+ 0x68,
+ 0x65,
+ 0x61,
+ 0x64,
+ 0x65,
+ 0x72,
+ 0x0,
+ 0x21,
+ 0x6d,
+ 0x61,
+ 0x69,
+ 0x6e,
+ 0x0,
+ 0x25,
+ 0x2,
+ 0x0,
+ 0x0,
+ 0x0,
+ 0x3,
+ 0x0,
+ 0x80,
+ 0x20,
+ 0x0,
+ };
+
+ testing.expect(buffer.items.len == exp_buffer.len);
+ testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..]));
+}