From 4c3e6c5bff967388dddc7ec352017c7b712d9f06 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 8 Dec 2020 16:52:50 +0100
Subject: macho: cleanup export trie generation and parsing

Now, ExportTrie is becoming usable for larger linking contexts such
as linking in multiple object files, or relinking dylibs, etc.
---
 src/link/MachO.zig      |  66 ++++++++---
 src/link/MachO/Trie.zig | 303 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 254 insertions(+), 115 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index a2925b3b6b..153f47c340 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -754,13 +754,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                     const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
                     const needed_size = @sizeOf(macho.linkedit_data_command);
                     if (needed_size + after_last_cmd_offset > text_section.offset) {
-                        // TODO We are in the position to be able to increase the padding by moving all sections
-                        // by the required offset, but this requires a little bit more thinking and bookkeeping.
-                        // For now, return an error informing the user of the problem.
-                        log.err("Not enough padding between load commands and start of __text section:\n", .{});
-                        log.err("Offset after last load command: 0x{x}\n", .{after_last_cmd_offset});
-                        log.err("Beginning of __text section: 0x{x}\n", .{text_section.offset});
-                        log.err("Needed size: 0x{x}\n", .{needed_size});
+                        std.log.err("Unable to extend padding between load commands and start of __text section.", .{});
+                        std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size * alloc_num / alloc_den});
+                        std.log.err("fall back to the system linker.", .{});
                         return error.NotEnoughPadding;
                     }
                     const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
@@ -1799,38 +1795,36 @@ fn writeCodeSignature(self: *MachO) !void {
 fn writeExportTrie(self: *MachO) !void {
     if (self.global_symbols.items.len == 0) return;
 
-    var trie: Trie = .{};
-    defer trie.deinit(self.base.allocator);
+    var trie = Trie.init(self.base.allocator);
+    defer trie.deinit();
 
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     for (self.global_symbols.items) |symbol| {
         // TODO figure out if we should put all global symbols into the export trie
         const name = self.getString(symbol.n_strx);
         assert(symbol.n_value >= text_segment.inner.vmaddr);
-        try trie.put(self.base.allocator, .{
+        try trie.put(.{
             .name = name,
             .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
             .export_flags = 0, // TODO workout creation of export flags
         });
     }
 
-    var buffer: std.ArrayListUnmanaged(u8) = .{};
-    defer buffer.deinit(self.base.allocator);
-
-    try trie.writeULEB128Mem(self.base.allocator, &buffer);
+    var buffer = try trie.writeULEB128Mem();
+    defer self.base.allocator.free(buffer);
 
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
-    const export_size = @intCast(u32, mem.alignForward(buffer.items.len, @sizeOf(u64)));
+    const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
     dyld_info.export_off = self.linkedit_segment_next_offset.?;
     dyld_info.export_size = export_size;
 
     log.debug("writing export trie from 0x{x} to 0x{x}\n", .{ dyld_info.export_off, dyld_info.export_off + export_size });
 
-    if (export_size > buffer.items.len) {
+    if (export_size > buffer.len) {
         // Pad out to align(8).
         try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.export_off + export_size);
     }
-    try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
+    try self.base.file.?.pwriteAll(buffer, dyld_info.export_off);
 
     self.linkedit_segment_next_offset = dyld_info.export_off + dyld_info.export_size;
     // Advance size of __LINKEDIT segment
@@ -1917,7 +1911,9 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
         switch (cmd.cmd()) {
             macho.LC_SEGMENT_64 => {
                 const x = cmd.Segment;
-                if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
+                if (isSegmentOrSection(&x.inner.segname, "__PAGEZERO")) {
+                    self.pagezero_segment_cmd_index = i;
+                } else if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
                     self.linkedit_segment_cmd_index = i;
                 } else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) {
                     self.text_segment_cmd_index = i;
@@ -1926,16 +1922,48 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
                             self.text_section_index = @intCast(u16, j);
                         }
                     }
+                } else if (isSegmentOrSection(&x.inner.segname, "__DATA")) {
+                    self.data_segment_cmd_index = i;
                 }
             },
+            macho.LC_DYLD_INFO_ONLY => {
+                self.dyld_info_cmd_index = i;
+            },
             macho.LC_SYMTAB => {
                 self.symtab_cmd_index = i;
             },
+            macho.LC_DYSYMTAB => {
+                self.dysymtab_cmd_index = i;
+            },
+            macho.LC_LOAD_DYLINKER => {
+                self.dylinker_cmd_index = i;
+            },
+            macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => {
+                self.version_min_cmd_index = i;
+            },
+            macho.LC_SOURCE_VERSION => {
+                self.source_version_cmd_index = i;
+            },
+            macho.LC_MAIN => {
+                self.main_cmd_index = i;
+            },
+            macho.LC_LOAD_DYLIB => {
+                self.libsystem_cmd_index = i; // TODO This is incorrect, but we'll fixup later.
+            },
+            macho.LC_FUNCTION_STARTS => {
+                self.function_starts_cmd_index = i;
+            },
+            macho.LC_DATA_IN_CODE => {
+                self.data_in_code_cmd_index = i;
+            },
             macho.LC_CODE_SIGNATURE => {
                 self.code_signature_cmd_index = i;
             },
             // TODO populate more MachO fields
-            else => {},
+            else => {
+                std.log.err("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
+                return error.UnknownLoadCommand;
+            },
         }
         self.load_commands.appendAssumeCapacity(cmd);
     }
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index 34ce4e99b9..cdc6581a06 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -44,20 +44,23 @@ pub const Symbol = struct {
     export_flags: u64,
 };
 
-const Edge = struct {
+pub const Edge = struct {
     from: *Node,
     to: *Node,
-    label: []const u8,
+    label: []u8,
 
-    fn deinit(self: *Edge, alloc: *Allocator) void {
-        self.to.deinit(alloc);
-        alloc.destroy(self.to);
+    fn deinit(self: *Edge, allocator: *Allocator) void {
+        self.to.deinit();
+        allocator.destroy(self.to);
+        allocator.free(self.label);
         self.from = undefined;
         self.to = undefined;
+        self.label = undefined;
     }
 };
 
-const Node = struct {
+pub const Node = struct {
+    base: *Trie,
     /// Export flags associated with this exported symbol (if any).
     export_flags: ?u64 = null,
     /// VM address offset wrt to the section this symbol is defined against (if any).
@@ -67,73 +70,97 @@ const Node = struct {
     /// List of all edges originating from this node.
     edges: std.ArrayListUnmanaged(Edge) = .{},
 
-    fn deinit(self: *Node, alloc: *Allocator) void {
+    fn deinit(self: *Node) void {
         for (self.edges.items) |*edge| {
-            edge.deinit(alloc);
+            edge.deinit(self.base.allocator);
         }
-        self.edges.deinit(alloc);
+        self.edges.deinit(self.base.allocator);
     }
 
-    const PutResult = struct {
-        /// Node reached at this stage of `put` op.
-        node: *Node,
-        /// Count of newly inserted nodes at this stage of `put` op.
-        node_count: usize,
-    };
-
     /// Inserts a new node starting from `self`.
-    fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult {
-        var curr_node_count = node_count;
+    fn put(self: *Node, label: []const u8) !*Node {
         // Check for match with edges from this node.
         for (self.edges.items) |*edge| {
-            const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{
-                .node = edge.to,
-                .node_count = curr_node_count,
-            };
+            const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
             if (match == 0) continue;
-            if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count);
+            if (match == edge.label.len) return edge.to.put(label[match..]);
 
             // Found a match, need to splice up nodes.
             // From: A -> B
             // To: A -> C -> B
-            const mid = try alloc.create(Node);
-            mid.* = .{};
-            const to_label = edge.label;
+            const mid = try self.base.allocator.create(Node);
+            mid.* = .{ .base = self.base };
+            var to_label = try self.base.allocator.dupe(u8, edge.label[match..]);
+            self.base.allocator.free(edge.label);
             const to_node = edge.to;
             edge.to = mid;
-            edge.label = label[0..match];
-            curr_node_count += 1;
+            edge.label = try self.base.allocator.dupe(u8, label[0..match]);
+            self.base.node_count += 1;
 
-            try mid.edges.append(alloc, .{
+            try mid.edges.append(self.base.allocator, .{
                 .from = mid,
                 .to = to_node,
-                .label = to_label[match..],
+                .label = to_label,
             });
 
-            if (match == label.len) {
-                return PutResult{ .node = to_node, .node_count = curr_node_count };
-            } else {
-                return mid.put(alloc, label[match..], curr_node_count);
-            }
+            return if (match == label.len) to_node else mid.put(label[match..]);
         }
 
         // Add a new node.
-        const node = try alloc.create(Node);
-        node.* = .{};
-        curr_node_count += 1;
+        const node = try self.base.allocator.create(Node);
+        node.* = .{ .base = self.base };
+        self.base.node_count += 1;
 
-        try self.edges.append(alloc, .{
+        try self.edges.append(self.base.allocator, .{
             .from = self,
             .to = node,
-            .label = label,
+            .label = try self.base.allocator.dupe(u8, label),
         });
 
-        return PutResult{ .node = node, .node_count = curr_node_count };
+        return node;
+    }
+
+    fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void {
+        self.trie_offset = try stream.getPos();
+        var reader = stream.reader();
+        const node_size = try leb.readULEB128(u64, reader);
+        if (node_size > 0) {
+            self.export_flags = try leb.readULEB128(u64, reader);
+            // TODO Parse flags.
+            self.vmaddr_offset = try leb.readULEB128(u64, reader);
+        }
+        const nedges = try reader.readByte();
+        self.base.node_count += nedges;
+        var i: usize = 0;
+        while (i < nedges) : (i += 1) {
+            var label = blk: {
+                var label_buf = std.ArrayList(u8).init(self.base.allocator);
+                while (true) {
+                    const next = try reader.readByte();
+                    if (next == @as(u8, 0))
+                        break;
+                    try label_buf.append(next);
+                }
+                break :blk label_buf.toOwnedSlice();
+            };
+            const seek_to = try leb.readULEB128(u64, reader);
+            const cur_pos = try stream.getPos();
+            try stream.seekTo(seek_to);
+            var node = try self.base.allocator.create(Node);
+            node.* = .{ .base = self.base };
+            try node.fromByteStream(stream);
+            try self.edges.append(self.base.allocator, .{
+                .from = self,
+                .to = node,
+                .label = label,
+            });
+            try stream.seekTo(cur_pos);
+        }
     }
 
     /// This method should only be called *after* updateOffset has been called!
     /// In case this is not upheld, this method will panic.
-    fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void {
+    fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void {
         assert(self.trie_offset != null); // You need to call updateOffset first.
         if (self.vmaddr_offset) |offset| {
             // Terminal node info: encode export flags and vmaddr offset of this symbol.
@@ -221,64 +248,95 @@ const Node = struct {
 /// the count always starts at 1.
 node_count: usize = 1,
 /// The root node of the trie.
-root: Node = .{},
+root: ?Node = null,
+allocator: *Allocator,
+
+pub fn init(allocator: *Allocator) Trie {
+    return .{ .allocator = allocator };
+}
 
 /// Insert a symbol into the trie, updating the prefixes in the process.
 /// This operation may change the layout of the trie by splicing edges in
 /// certain circumstances.
-pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
-    const res = try self.root.put(alloc, symbol.name, 0);
-    self.node_count += res.node_count;
-    res.node.vmaddr_offset = symbol.vmaddr_offset;
-    res.node.export_flags = symbol.export_flags;
+pub fn put(self: *Trie, symbol: Symbol) !void {
+    if (self.root == null) {
+        self.root = .{ .base = self };
+    }
+    const node = try self.root.?.put(symbol.name);
+    node.vmaddr_offset = symbol.vmaddr_offset;
+    node.export_flags = symbol.export_flags;
 }
 
-/// Write the trie to a buffer ULEB128 encoded.
-pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
-    var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
-    defer ordered_nodes.deinit(alloc);
+const FromByteStreamError = error{
+    OutOfMemory,
+    EndOfStream,
+    Overflow,
+};
 
-    try ordered_nodes.ensureCapacity(alloc, self.node_count);
-    walkInOrder(&self.root, &ordered_nodes);
+/// Parse the trie from a byte stream.
+pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void {
+    if (self.root == null) {
+        self.root = .{ .base = self };
+    }
+    return self.root.?.fromByteStream(stream);
+}
+
+/// Write the trie to a buffer ULEB128 encoded.
+/// Caller owns the memory and needs to free it.
+pub fn writeULEB128Mem(self: *Trie) ![]u8 {
+    var ordered_nodes = try self.nodes();
+    defer self.allocator.free(ordered_nodes);
 
     var offset: usize = 0;
     var more: bool = true;
     while (more) {
         offset = 0;
         more = false;
-        for (ordered_nodes.items) |node| {
+        for (ordered_nodes) |node| {
             const res = node.updateOffset(offset);
             offset += res.node_size;
             if (res.updated) more = true;
         }
     }
 
-    try buffer.ensureCapacity(alloc, buffer.items.len + offset);
-    for (ordered_nodes.items) |node| {
-        try node.writeULEB128Mem(buffer);
+    var buffer = std.ArrayList(u8).init(self.allocator);
+    try buffer.ensureCapacity(offset);
+    for (ordered_nodes) |node| {
+        try node.writeULEB128Mem(&buffer);
     }
+    return buffer.toOwnedSlice();
 }
 
-/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes.
-fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void {
-    list.appendAssumeCapacity(node);
-    for (node.edges.items) |*edge| {
-        walkInOrder(edge.to, list);
+pub fn nodes(self: *Trie) ![]*Node {
+    var ordered_nodes = std.ArrayList(*Node).init(self.allocator);
+    try ordered_nodes.ensureCapacity(self.node_count);
+
+    comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
+    var fifo = Fifo.init();
+    try fifo.writeItem(&self.root.?);
+
+    while (fifo.readItem()) |next| {
+        for (next.edges.items) |*edge| {
+            try fifo.writeItem(edge.to);
+        }
+        ordered_nodes.appendAssumeCapacity(next);
     }
+
+    return ordered_nodes.toOwnedSlice();
 }
 
-pub fn deinit(self: *Trie, alloc: *Allocator) void {
-    self.root.deinit(alloc);
+pub fn deinit(self: *Trie) void {
+    self.root.?.deinit();
 }
 
 test "Trie node count" {
     var gpa = testing.allocator;
-    var trie: Trie = .{};
-    defer trie.deinit(gpa);
+    var trie = Trie.init(gpa);
+    defer trie.deinit();
 
     testing.expectEqual(trie.node_count, 1);
 
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "_main",
         .vmaddr_offset = 0,
         .export_flags = 0,
@@ -286,14 +344,14 @@ test "Trie node count" {
     testing.expectEqual(trie.node_count, 2);
 
     // Inserting the same node shouldn't update the trie.
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "_main",
         .vmaddr_offset = 0,
         .export_flags = 0,
     });
     testing.expectEqual(trie.node_count, 2);
 
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "__mh_execute_header",
         .vmaddr_offset = 0x1000,
         .export_flags = 0,
@@ -301,13 +359,13 @@ test "Trie node count" {
     testing.expectEqual(trie.node_count, 4);
 
     // Inserting the same node shouldn't update the trie.
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "__mh_execute_header",
         .vmaddr_offset = 0x1000,
         .export_flags = 0,
     });
     testing.expectEqual(trie.node_count, 4);
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "_main",
         .vmaddr_offset = 0,
         .export_flags = 0,
@@ -317,31 +375,28 @@ test "Trie node count" {
 
 test "Trie basic" {
     var gpa = testing.allocator;
-    var trie: Trie = .{};
-    defer trie.deinit(gpa);
-
-    // root
-    testing.expect(trie.root.edges.items.len == 0);
+    var trie = Trie.init(gpa);
+    defer trie.deinit();
 
     // root --- _st ---> node
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "_st",
         .vmaddr_offset = 0,
         .export_flags = 0,
     });
-    testing.expect(trie.root.edges.items.len == 1);
-    testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
+    testing.expect(trie.root.?.edges.items.len == 1);
+    testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st"));
 
     {
         // root --- _st ---> node --- art ---> node
-        try trie.put(gpa, .{
+        try trie.put(.{
             .name = "_start",
             .vmaddr_offset = 0,
             .export_flags = 0,
         });
-        testing.expect(trie.root.edges.items.len == 1);
+        testing.expect(trie.root.?.edges.items.len == 1);
 
-        const nextEdge = &trie.root.edges.items[0];
+        const nextEdge = &trie.root.?.edges.items[0];
         testing.expect(mem.eql(u8, nextEdge.label, "_st"));
         testing.expect(nextEdge.to.edges.items.len == 1);
         testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
@@ -350,14 +405,14 @@ test "Trie basic" {
         // root --- _ ---> node --- st ---> node --- art ---> node
         //                  |
         //                  |   --- main ---> node
-        try trie.put(gpa, .{
+        try trie.put(.{
             .name = "_main",
             .vmaddr_offset = 0,
             .export_flags = 0,
         });
-        testing.expect(trie.root.edges.items.len == 1);
+        testing.expect(trie.root.?.edges.items.len == 1);
 
-        const nextEdge = &trie.root.edges.items[0];
+        const nextEdge = &trie.root.?.edges.items[0];
         testing.expect(mem.eql(u8, nextEdge.label, "_"));
         testing.expect(nextEdge.to.edges.items.len == 2);
         testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
@@ -370,24 +425,22 @@ test "Trie basic" {
 
 test "Trie.writeULEB128Mem" {
     var gpa = testing.allocator;
-    var trie: Trie = .{};
-    defer trie.deinit(gpa);
+    var trie = Trie.init(gpa);
+    defer trie.deinit();
 
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "__mh_execute_header",
         .vmaddr_offset = 0,
         .export_flags = 0,
     });
-    try trie.put(gpa, .{
+    try trie.put(.{
         .name = "_main",
         .vmaddr_offset = 0x1000,
         .export_flags = 0,
     });
 
-    var buffer: std.ArrayListUnmanaged(u8) = .{};
-    defer buffer.deinit(gpa);
-
-    try trie.writeULEB128Mem(gpa, &buffer);
+    var buffer = try trie.writeULEB128Mem();
+    defer gpa.free(buffer);
 
     const exp_buffer = [_]u8{
         0x0,
@@ -434,6 +487,64 @@ test "Trie.writeULEB128Mem" {
         0x0,
     };
 
-    testing.expect(buffer.items.len == exp_buffer.len);
-    testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..]));
+    testing.expect(buffer.len == exp_buffer.len);
+    testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+}
+
+test "parse Trie from byte stream" {
+    var gpa = testing.allocator;
+
+    const in_buffer = [_]u8{
+        0x0,
+        0x1,
+        0x5f,
+        0x0,
+        0x5,
+        0x0,
+        0x2,
+        0x5f,
+        0x6d,
+        0x68,
+        0x5f,
+        0x65,
+        0x78,
+        0x65,
+        0x63,
+        0x75,
+        0x74,
+        0x65,
+        0x5f,
+        0x68,
+        0x65,
+        0x61,
+        0x64,
+        0x65,
+        0x72,
+        0x0,
+        0x21,
+        0x6d,
+        0x61,
+        0x69,
+        0x6e,
+        0x0,
+        0x25,
+        0x2,
+        0x0,
+        0x0,
+        0x0,
+        0x3,
+        0x0,
+        0x80,
+        0x20,
+        0x0,
+    };
+    var stream = std.io.fixedBufferStream(in_buffer[0..]);
+    var trie = Trie.init(gpa);
+    defer trie.deinit();
+    try trie.fromByteStream(&stream);
+
+    var out_buffer = try trie.writeULEB128Mem();
+    defer gpa.free(out_buffer);
+
+    testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
 }
-- 
cgit v1.2.3


From a579f8ae8d6009d95ef22879bc725a233f838d6f Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 8 Dec 2020 17:17:48 +0100
Subject: macho: add generic terminal info nullable struct to a node

---
 src/link/MachO/Trie.zig | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index cdc6581a06..3877c8b5a9 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -34,6 +34,7 @@ const std = @import("std");
 const mem = std.mem;
 const leb = std.leb;
 const log = std.log.scoped(.link);
+const macho = std.macho;
 const testing = std.testing;
 const assert = std.debug.assert;
 const Allocator = mem.Allocator;
@@ -61,10 +62,14 @@ pub const Edge = struct {
 
 pub const Node = struct {
     base: *Trie,
-    /// Export flags associated with this exported symbol (if any).
-    export_flags: ?u64 = null,
-    /// VM address offset wrt to the section this symbol is defined against (if any).
-    vmaddr_offset: ?u64 = null,
+    /// Terminal info associated with this node.
+    /// If this node is not a terminal node, info is null.
+    terminal_info: ?struct {
+        /// Export flags associated with this exported symbol.
+        export_flags: u64,
+        /// VM address offset wrt to the section this symbol is defined against.
+        vmaddr_offset: u64,
+    } = null,
     /// Offset of this node in the trie output byte stream.
     trie_offset: ?usize = null,
     /// List of all edges originating from this node.
@@ -125,9 +130,15 @@ pub const Node = struct {
         var reader = stream.reader();
         const node_size = try leb.readULEB128(u64, reader);
         if (node_size > 0) {
-            self.export_flags = try leb.readULEB128(u64, reader);
-            // TODO Parse flags.
-            self.vmaddr_offset = try leb.readULEB128(u64, reader);
+            const export_flags = try leb.readULEB128(u64, reader);
+            // TODO Parse special flags.
+            assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
+                export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+            const vmaddr_offset = try leb.readULEB128(u64, reader);
+            self.terminal_info = .{
+                .export_flags = export_flags,
+                .vmaddr_offset = vmaddr_offset,
+            };
         }
         const nedges = try reader.readByte();
         self.base.node_count += nedges;
@@ -162,13 +173,16 @@ pub const Node = struct {
     /// In case this is not upheld, this method will panic.
     fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void {
         assert(self.trie_offset != null); // You need to call updateOffset first.
-        if (self.vmaddr_offset) |offset| {
+        if (self.terminal_info) |info| {
             // Terminal node info: encode export flags and vmaddr offset of this symbol.
             var info_buf_len: usize = 0;
             var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
             var info_stream = std.io.fixedBufferStream(&info_buf);
-            try leb.writeULEB128(info_stream.writer(), self.export_flags.?);
-            try leb.writeULEB128(info_stream.writer(), offset);
+            // TODO Implement for special flags.
+            assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
+                info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+            try leb.writeULEB128(info_stream.writer(), info.export_flags);
+            try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset);
 
             // Encode the size of the terminal node info.
             var size_buf: [@sizeOf(u64)]u8 = undefined;
@@ -208,9 +222,9 @@ pub const Node = struct {
     /// Updates offset of this node in the output byte stream.
     fn updateOffset(self: *Node, offset: usize) UpdateResult {
         var node_size: usize = 0;
-        if (self.vmaddr_offset) |vmaddr| {
-            node_size += sizeULEB128Mem(self.export_flags.?);
-            node_size += sizeULEB128Mem(vmaddr);
+        if (self.terminal_info) |info| {
+            node_size += sizeULEB128Mem(info.export_flags);
+            node_size += sizeULEB128Mem(info.vmaddr_offset);
             node_size += sizeULEB128Mem(node_size);
         } else {
             node_size += 1; // 0x0 for non-terminal nodes
@@ -263,8 +277,10 @@ pub fn put(self: *Trie, symbol: Symbol) !void {
         self.root = .{ .base = self };
     }
     const node = try self.root.?.put(symbol.name);
-    node.vmaddr_offset = symbol.vmaddr_offset;
-    node.export_flags = symbol.export_flags;
+    node.terminal_info = .{
+        .vmaddr_offset = symbol.vmaddr_offset,
+        .export_flags = symbol.export_flags,
+    };
 }
 
 const FromByteStreamError = error{
-- 
cgit v1.2.3


From a28340405392b4a0a687e668406a067be1ae5e3c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 9 Dec 2020 11:01:51 +0100
Subject: macho: split writing Trie into finalize and const write

---
 src/link/MachO.zig      |   6 +-
 src/link/MachO/Trie.zig | 432 +++++++++++++++++++++++++-----------------------
 2 files changed, 234 insertions(+), 204 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 153f47c340..c265deabdf 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1810,8 +1810,12 @@ fn writeExportTrie(self: *MachO) !void {
         });
     }
 
-    var buffer = try trie.writeULEB128Mem();
+    try trie.finalize();
+    var buffer = try self.base.allocator.alloc(u8, trie.size);
     defer self.base.allocator.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    const nwritten = try trie.write(stream.writer());
+    assert(nwritten == trie.size);
 
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index 3877c8b5a9..b5f2057ff1 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -51,7 +51,7 @@ pub const Edge = struct {
     label: []u8,
 
     fn deinit(self: *Edge, allocator: *Allocator) void {
-        self.to.deinit();
+        self.to.deinit(allocator);
         allocator.destroy(self.to);
         allocator.free(self.label);
         self.from = undefined;
@@ -62,6 +62,7 @@ pub const Edge = struct {
 
 pub const Node = struct {
     base: *Trie,
+
     /// Terminal info associated with this node.
     /// If this node is not a terminal node, info is null.
     terminal_info: ?struct {
@@ -70,82 +71,93 @@ pub const Node = struct {
         /// VM address offset wrt to the section this symbol is defined against.
         vmaddr_offset: u64,
     } = null,
+
     /// Offset of this node in the trie output byte stream.
     trie_offset: ?usize = null,
+
     /// List of all edges originating from this node.
     edges: std.ArrayListUnmanaged(Edge) = .{},
 
-    fn deinit(self: *Node) void {
+    node_dirty: bool = true,
+
+    fn deinit(self: *Node, allocator: *Allocator) void {
         for (self.edges.items) |*edge| {
-            edge.deinit(self.base.allocator);
+            edge.deinit(allocator);
         }
-        self.edges.deinit(self.base.allocator);
+        self.edges.deinit(allocator);
     }
 
     /// Inserts a new node starting from `self`.
-    fn put(self: *Node, label: []const u8) !*Node {
+    fn put(self: *Node, allocator: *Allocator, label: []const u8) !*Node {
         // Check for match with edges from this node.
         for (self.edges.items) |*edge| {
             const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
             if (match == 0) continue;
-            if (match == edge.label.len) return edge.to.put(label[match..]);
+            if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
 
             // Found a match, need to splice up nodes.
             // From: A -> B
             // To: A -> C -> B
-            const mid = try self.base.allocator.create(Node);
+            const mid = try allocator.create(Node);
             mid.* = .{ .base = self.base };
-            var to_label = try self.base.allocator.dupe(u8, edge.label[match..]);
-            self.base.allocator.free(edge.label);
+            var to_label = try allocator.dupe(u8, edge.label[match..]);
+            allocator.free(edge.label);
             const to_node = edge.to;
             edge.to = mid;
-            edge.label = try self.base.allocator.dupe(u8, label[0..match]);
+            edge.label = try allocator.dupe(u8, label[0..match]);
             self.base.node_count += 1;
 
-            try mid.edges.append(self.base.allocator, .{
+            try mid.edges.append(allocator, .{
                 .from = mid,
                 .to = to_node,
                 .label = to_label,
             });
 
-            return if (match == label.len) to_node else mid.put(label[match..]);
+            return if (match == label.len) to_node else mid.put(allocator, label[match..]);
         }
 
         // Add a new node.
-        const node = try self.base.allocator.create(Node);
+        const node = try allocator.create(Node);
         node.* = .{ .base = self.base };
         self.base.node_count += 1;
 
-        try self.edges.append(self.base.allocator, .{
+        try self.edges.append(allocator, .{
             .from = self,
             .to = node,
-            .label = try self.base.allocator.dupe(u8, label),
+            .label = try allocator.dupe(u8, label),
         });
 
         return node;
     }
 
-    fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void {
-        self.trie_offset = try stream.getPos();
-        var reader = stream.reader();
+    /// Recursively parses the node from the input byte stream.
+    fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!void {
+        self.node_dirty = true;
+
+        self.trie_offset = try reader.context.getPos();
+
         const node_size = try leb.readULEB128(u64, reader);
         if (node_size > 0) {
             const export_flags = try leb.readULEB128(u64, reader);
             // TODO Parse special flags.
             assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
                 export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+
             const vmaddr_offset = try leb.readULEB128(u64, reader);
+
             self.terminal_info = .{
                 .export_flags = export_flags,
                 .vmaddr_offset = vmaddr_offset,
             };
         }
+
         const nedges = try reader.readByte();
         self.base.node_count += nedges;
+
         var i: usize = 0;
         while (i < nedges) : (i += 1) {
             var label = blk: {
-                var label_buf = std.ArrayList(u8).init(self.base.allocator);
+                var label_buf = std.ArrayList(u8).init(allocator);
                 while (true) {
                     const next = try reader.readByte();
                     if (next == @as(u8, 0))
@@ -154,25 +166,32 @@ pub const Node = struct {
                 }
                 break :blk label_buf.toOwnedSlice();
             };
+
             const seek_to = try leb.readULEB128(u64, reader);
-            const cur_pos = try stream.getPos();
-            try stream.seekTo(seek_to);
-            var node = try self.base.allocator.create(Node);
+            const cur_pos = try reader.context.getPos();
+            try reader.context.seekTo(seek_to);
+
+            const node = try allocator.create(Node);
             node.* = .{ .base = self.base };
-            try node.fromByteStream(stream);
-            try self.edges.append(self.base.allocator, .{
+
+            try node.read(allocator, reader);
+            try self.edges.append(allocator, .{
                 .from = self,
                 .to = node,
                 .label = label,
             });
-            try stream.seekTo(cur_pos);
+            try reader.context.seekTo(cur_pos);
         }
     }
 
-    /// This method should only be called *after* updateOffset has been called!
-    /// In case this is not upheld, this method will panic.
-    fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void {
-        assert(self.trie_offset != null); // You need to call updateOffset first.
+    /// Writes this node to a byte stream.
+    /// The children of this node *are* not written to the byte stream
+    /// recursively. To write all nodes to a byte stream in sequence,
+    /// iterate over `Trie.ordered_nodes` and call this method on each node.
+    /// This is one of the requirements of the MachO.
+    /// Panics if `finalize` was not called before calling this method.
+    fn write(self: Node, writer: anytype) !void {
+        assert(!self.node_dirty);
         if (self.terminal_info) |info| {
             // Terminal node info: encode export flags and vmaddr offset of this symbol.
             var info_buf_len: usize = 0;
@@ -189,38 +208,35 @@ pub const Node = struct {
             var size_stream = std.io.fixedBufferStream(&size_buf);
             try leb.writeULEB128(size_stream.writer(), info_stream.pos);
 
-            // Now, write them to the output buffer.
-            buffer.appendSliceAssumeCapacity(size_buf[0..size_stream.pos]);
-            buffer.appendSliceAssumeCapacity(info_buf[0..info_stream.pos]);
+            // Now, write them to the output stream.
+            try writer.writeAll(size_buf[0..size_stream.pos]);
+            try writer.writeAll(info_buf[0..info_stream.pos]);
         } else {
             // Non-terminal node is delimited by 0 byte.
-            buffer.appendAssumeCapacity(0);
+            try writer.writeByte(0);
         }
         // Write number of edges (max legal number of edges is 256).
-        buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len));
+        try writer.writeByte(@intCast(u8, self.edges.items.len));
 
         for (self.edges.items) |edge| {
-            // Write edges labels.
-            buffer.appendSliceAssumeCapacity(edge.label);
-            buffer.appendAssumeCapacity(0);
-
-            var buf: [@sizeOf(u64)]u8 = undefined;
-            var buf_stream = std.io.fixedBufferStream(&buf);
-            try leb.writeULEB128(buf_stream.writer(), edge.to.trie_offset.?);
-            buffer.appendSliceAssumeCapacity(buf[0..buf_stream.pos]);
+            // Write edge label and offset to next node in trie.
+            try writer.writeAll(edge.label);
+            try writer.writeByte(0);
+            try leb.writeULEB128(writer, edge.to.trie_offset.?);
         }
     }
 
-    const UpdateResult = struct {
+    const FinalizeResult = struct {
         /// Current size of this node in bytes.
         node_size: usize,
+
         /// True if the trie offset of this node in the output byte stream
         /// would need updating; false otherwise.
         updated: bool,
     };
 
     /// Updates offset of this node in the output byte stream.
-    fn updateOffset(self: *Node, offset: usize) UpdateResult {
+    fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
         var node_size: usize = 0;
         if (self.terminal_info) |info| {
             node_size += sizeULEB128Mem(info.export_flags);
@@ -237,8 +253,9 @@ pub const Node = struct {
         }
 
         const trie_offset = self.trie_offset orelse 0;
-        const updated = offset != trie_offset;
-        self.trie_offset = offset;
+        const updated = offset_in_trie != trie_offset;
+        self.trie_offset = offset_in_trie;
+        self.node_dirty = false;
 
         return .{ .node_size = node_size, .updated = updated };
     }
@@ -256,15 +273,30 @@ pub const Node = struct {
     }
 };
 
-/// Count of nodes in the trie.
-/// The count is updated at every `put` call.
-/// The trie always consists of at least a root node, hence
-/// the count always starts at 1.
-node_count: usize = 1,
 /// The root node of the trie.
-root: ?Node = null,
+root: ?*Node = null,
+
 allocator: *Allocator,
 
+/// If you want to access nodes ordered in DFS fashion,
+/// you should call `finalize` first since the nodes
+/// in this container are not guaranteed to not be stale
+/// if more insertions took place after the last `finalize`
+/// call.
+ordered_nodes: std.ArrayListUnmanaged(*Node) = .{},
+
+/// The size of the trie in bytes.
+/// This value may be outdated if there were additional
+/// insertions performed after `finalize` was called.
+/// Call `finalize` before accessing this value to ensure
+/// it is up-to-date.
+size: usize = 0,
+
+/// Number of nodes currently in the trie.
+node_count: usize = 0,
+
+trie_dirty: bool = true,
+
 pub fn init(allocator: *Allocator) Trie {
     return .{ .allocator = allocator };
 }
@@ -273,76 +305,90 @@ pub fn init(allocator: *Allocator) Trie {
 /// This operation may change the layout of the trie by splicing edges in
 /// certain circumstances.
 pub fn put(self: *Trie, symbol: Symbol) !void {
-    if (self.root == null) {
-        self.root = .{ .base = self };
-    }
-    const node = try self.root.?.put(symbol.name);
+    try self.createRoot();
+    const node = try self.root.?.put(self.allocator, symbol.name);
     node.terminal_info = .{
         .vmaddr_offset = symbol.vmaddr_offset,
         .export_flags = symbol.export_flags,
     };
+    self.trie_dirty = true;
 }
 
-const FromByteStreamError = error{
-    OutOfMemory,
-    EndOfStream,
-    Overflow,
-};
+/// Finalizes this trie for writing to a byte stream.
+/// This step performs multiple passes through the trie ensuring
+/// there are no gaps after every `Node` is ULEB128 encoded.
+/// Call this method before trying to `write` the trie to a byte stream.
+pub fn finalize(self: *Trie) !void {
+    if (!self.trie_dirty) return;
 
-/// Parse the trie from a byte stream.
-pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void {
-    if (self.root == null) {
-        self.root = .{ .base = self };
-    }
-    return self.root.?.fromByteStream(stream);
-}
+    self.ordered_nodes.shrinkRetainingCapacity(0);
+    try self.ordered_nodes.ensureCapacity(self.allocator, self.node_count);
 
-/// Write the trie to a buffer ULEB128 encoded.
-/// Caller owns the memory and needs to free it.
-pub fn writeULEB128Mem(self: *Trie) ![]u8 {
-    var ordered_nodes = try self.nodes();
-    defer self.allocator.free(ordered_nodes);
+    comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
+    var fifo = Fifo.init();
+    try fifo.writeItem(self.root.?);
+
+    while (fifo.readItem()) |next| {
+        for (next.edges.items) |*edge| {
+            try fifo.writeItem(edge.to);
+        }
+        self.ordered_nodes.appendAssumeCapacity(next);
+    }
 
-    var offset: usize = 0;
     var more: bool = true;
     while (more) {
-        offset = 0;
+        self.size = 0;
         more = false;
-        for (ordered_nodes) |node| {
-            const res = node.updateOffset(offset);
-            offset += res.node_size;
+        for (self.ordered_nodes.items) |node| {
+            const res = node.finalize(self.size);
+            self.size += res.node_size;
             if (res.updated) more = true;
         }
     }
 
-    var buffer = std.ArrayList(u8).init(self.allocator);
-    try buffer.ensureCapacity(offset);
-    for (ordered_nodes) |node| {
-        try node.writeULEB128Mem(&buffer);
-    }
-    return buffer.toOwnedSlice();
+    self.trie_dirty = false;
 }
 
-pub fn nodes(self: *Trie) ![]*Node {
-    var ordered_nodes = std.ArrayList(*Node).init(self.allocator);
-    try ordered_nodes.ensureCapacity(self.node_count);
+const ReadError = error{
+    OutOfMemory,
+    EndOfStream,
+    Overflow,
+};
 
-    comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
-    var fifo = Fifo.init();
-    try fifo.writeItem(&self.root.?);
+/// Parse the trie from a byte stream.
+pub fn read(self: *Trie, reader: anytype) ReadError!void {
+    try self.createRoot();
+    return self.root.?.read(self.allocator, reader);
+}
 
-    while (fifo.readItem()) |next| {
-        for (next.edges.items) |*edge| {
-            try fifo.writeItem(edge.to);
-        }
-        ordered_nodes.appendAssumeCapacity(next);
+/// Write the trie to a byte stream.
+/// Caller owns the memory and needs to free it.
+/// Panics if the trie was not finalized using `finalize`
+/// before calling this method.
+pub fn write(self: Trie, writer: anytype) !usize {
+    assert(!self.trie_dirty);
+    var counting_writer = std.io.countingWriter(writer);
+    for (self.ordered_nodes.items) |node| {
+        try node.write(counting_writer.writer());
     }
-
-    return ordered_nodes.toOwnedSlice();
+    return counting_writer.bytes_written;
 }
 
 pub fn deinit(self: *Trie) void {
-    self.root.?.deinit();
+    if (self.root) |root| {
+        root.deinit(self.allocator);
+        self.allocator.destroy(root);
+    }
+    self.ordered_nodes.deinit(self.allocator);
+}
+
+fn createRoot(self: *Trie) !void {
+    if (self.root == null) {
+        const root = try self.allocator.create(Node);
+        root.* = .{ .base = self };
+        self.root = root;
+        self.node_count += 1;
+    }
 }
 
 test "Trie node count" {
@@ -350,7 +396,8 @@ test "Trie node count" {
     var trie = Trie.init(gpa);
     defer trie.deinit();
 
-    testing.expectEqual(trie.node_count, 1);
+    testing.expectEqual(trie.node_count, 0);
+    testing.expect(trie.root == null);
 
     try trie.put(.{
         .name = "_main",
@@ -439,7 +486,7 @@ test "Trie basic" {
     }
 }
 
-test "Trie.writeULEB128Mem" {
+test "write Trie to a byte stream" {
     var gpa = testing.allocator;
     var trie = Trie.init(gpa);
     defer trie.deinit();
@@ -455,112 +502,91 @@ test "Trie.writeULEB128Mem" {
         .export_flags = 0,
     });
 
-    var buffer = try trie.writeULEB128Mem();
-    defer gpa.free(buffer);
+    try trie.finalize();
+    try trie.finalize(); // Finalizing mulitple times is a nop subsequently unless we add new nodes.
 
     const exp_buffer = [_]u8{
-        0x0,
-        0x1,
-        0x5f,
-        0x0,
-        0x5,
-        0x0,
-        0x2,
-        0x5f,
-        0x6d,
-        0x68,
-        0x5f,
-        0x65,
-        0x78,
-        0x65,
-        0x63,
-        0x75,
-        0x74,
-        0x65,
-        0x5f,
-        0x68,
-        0x65,
-        0x61,
-        0x64,
-        0x65,
-        0x72,
-        0x0,
-        0x21,
-        0x6d,
-        0x61,
-        0x69,
-        0x6e,
-        0x0,
-        0x25,
-        0x2,
-        0x0,
-        0x0,
-        0x0,
-        0x3,
-        0x0,
-        0x80,
-        0x20,
-        0x0,
+        0x0, 0x1, // node root
+        0x5f, 0x0, 0x5, // edge '_'
+        0x0, 0x2, // non-terminal node
+        0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
+        0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
+        0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
+        0x2, 0x0, 0x0, 0x0, // terminal node
+        0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
     };
 
-    testing.expect(buffer.len == exp_buffer.len);
-    testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+    var buffer = try gpa.alloc(u8, trie.size);
+    defer gpa.free(buffer);
+    var stream = std.io.fixedBufferStream(buffer);
+    {
+        const nwritten = try trie.write(stream.writer());
+        testing.expect(nwritten == trie.size);
+        testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+    }
+    {
+        // Writing finalized trie again should yield the same result.
+        try stream.seekTo(0);
+        const nwritten = try trie.write(stream.writer());
+        testing.expect(nwritten == trie.size);
+        testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
+    }
 }
 
-test "parse Trie from byte stream" {
-    var gpa = testing.allocator;
-
-    const in_buffer = [_]u8{
-        0x0,
-        0x1,
-        0x5f,
-        0x0,
-        0x5,
-        0x0,
-        0x2,
-        0x5f,
-        0x6d,
-        0x68,
-        0x5f,
-        0x65,
-        0x78,
-        0x65,
-        0x63,
-        0x75,
-        0x74,
-        0x65,
-        0x5f,
-        0x68,
-        0x65,
-        0x61,
-        0x64,
-        0x65,
-        0x72,
-        0x0,
-        0x21,
-        0x6d,
-        0x61,
-        0x69,
-        0x6e,
-        0x0,
-        0x25,
-        0x2,
-        0x0,
-        0x0,
-        0x0,
-        0x3,
-        0x0,
-        0x80,
-        0x20,
-        0x0,
-    };
-    var stream = std.io.fixedBufferStream(in_buffer[0..]);
-    var trie = Trie.init(gpa);
-    defer trie.deinit();
-    try trie.fromByteStream(&stream);
-
-    var out_buffer = try trie.writeULEB128Mem();
-    defer gpa.free(out_buffer);
-
-    testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
-}
+// test "parse Trie from byte stream" {
+//     var gpa = testing.allocator;
+
+//     const in_buffer = [_]u8{
+//         0x0,
+//         0x1,
+//         0x5f,
+//         0x0,
+//         0x5,
+//         0x0,
+//         0x2,
+//         0x5f,
+//         0x6d,
+//         0x68,
+//         0x5f,
+//         0x65,
+//         0x78,
+//         0x65,
+//         0x63,
+//         0x75,
+//         0x74,
+//         0x65,
+//         0x5f,
+//         0x68,
+//         0x65,
+//         0x61,
+//         0x64,
+//         0x65,
+//         0x72,
+//         0x0,
+//         0x21,
+//         0x6d,
+//         0x61,
+//         0x69,
+//         0x6e,
+//         0x0,
+//         0x25,
+//         0x2,
+//         0x0,
+//         0x0,
+//         0x0,
+//         0x3,
+//         0x0,
+//         0x80,
+//         0x20,
+//         0x0,
+//     };
+//     var stream = std.io.fixedBufferStream(in_buffer[0..]);
+//     var trie = Trie.init(gpa);
+//     defer trie.deinit();
+//     try trie.fromByteStream(&stream);
+
+//     var out_buffer = try trie.writeULEB128Mem();
+//     defer gpa.free(out_buffer);
+
+//     testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
+// }
-- 
cgit v1.2.3


From 601600dec981e41d43bb72113d9284cbb9e1d9ae Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 9 Dec 2020 11:05:52 +0100
Subject: macho: parsing Trie now takes a reader and returns bytes read

---
 src/link/MachO.zig      |   2 +-
 src/link/MachO/Trie.zig | 164 +++++++++++++++++++++++-------------------------
 2 files changed, 79 insertions(+), 87 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index c265deabdf..120326da96 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1806,7 +1806,7 @@ fn writeExportTrie(self: *MachO) !void {
         try trie.put(.{
             .name = name,
             .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
-            .export_flags = 0, // TODO workout creation of export flags
+            .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR,
         });
     }
 
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index b5f2057ff1..0016ff329c 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -39,27 +39,6 @@ const testing = std.testing;
 const assert = std.debug.assert;
 const Allocator = mem.Allocator;
 
-pub const Symbol = struct {
-    name: []const u8,
-    vmaddr_offset: u64,
-    export_flags: u64,
-};
-
-pub const Edge = struct {
-    from: *Node,
-    to: *Node,
-    label: []u8,
-
-    fn deinit(self: *Edge, allocator: *Allocator) void {
-        self.to.deinit(allocator);
-        allocator.destroy(self.to);
-        allocator.free(self.label);
-        self.from = undefined;
-        self.to = undefined;
-        self.label = undefined;
-    }
-};
-
 pub const Node = struct {
     base: *Trie,
 
@@ -80,6 +59,22 @@ pub const Node = struct {
 
     node_dirty: bool = true,
 
+    /// Edge connecting to nodes in the trie.
+    pub const Edge = struct {
+        from: *Node,
+        to: *Node,
+        label: []u8,
+
+        fn deinit(self: *Edge, allocator: *Allocator) void {
+            self.to.deinit(allocator);
+            allocator.destroy(self.to);
+            allocator.free(self.label);
+            self.from = undefined;
+            self.to = undefined;
+            self.label = undefined;
+        }
+    };
+
     fn deinit(self: *Node, allocator: *Allocator) void {
         for (self.edges.items) |*edge| {
             edge.deinit(allocator);
@@ -131,10 +126,12 @@ pub const Node = struct {
     }
 
     /// Recursively parses the node from the input byte stream.
-    fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!void {
+    fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!usize {
         self.node_dirty = true;
+        const trie_offset = try reader.context.getPos();
+        self.trie_offset = trie_offset;
 
-        self.trie_offset = try reader.context.getPos();
+        var nread: usize = 0;
 
         const node_size = try leb.readULEB128(u64, reader);
         if (node_size > 0) {
@@ -154,9 +151,13 @@ pub const Node = struct {
         const nedges = try reader.readByte();
         self.base.node_count += nedges;
 
+        nread += (try reader.context.getPos()) - trie_offset;
+
         var i: usize = 0;
         while (i < nedges) : (i += 1) {
-            var label = blk: {
+            const edge_start_pos = try reader.context.getPos();
+
+            const label = blk: {
                 var label_buf = std.ArrayList(u8).init(allocator);
                 while (true) {
                     const next = try reader.readByte();
@@ -168,20 +169,24 @@ pub const Node = struct {
             };
 
             const seek_to = try leb.readULEB128(u64, reader);
-            const cur_pos = try reader.context.getPos();
+            const return_pos = try reader.context.getPos();
+
+            nread += return_pos - edge_start_pos;
             try reader.context.seekTo(seek_to);
 
             const node = try allocator.create(Node);
             node.* = .{ .base = self.base };
 
-            try node.read(allocator, reader);
+            nread += try node.read(allocator, reader);
             try self.edges.append(allocator, .{
                 .from = self,
                 .to = node,
                 .label = label,
             });
-            try reader.context.seekTo(cur_pos);
+            try reader.context.seekTo(return_pos);
         }
+
+        return nread;
     }
 
     /// Writes this node to a byte stream.
@@ -301,10 +306,23 @@ pub fn init(allocator: *Allocator) Trie {
     return .{ .allocator = allocator };
 }
 
+/// Export symbol that is to be placed in the trie.
+pub const ExportSymbol = struct {
+    /// Name of the symbol.
+    name: []const u8,
+
+    /// Offset of this symbol's virtual memory address from the beginning
+    /// of the __TEXT segment.
+    vmaddr_offset: u64,
+
+    /// Export flags of this exported symbol.
+    export_flags: u64,
+};
+
 /// Insert a symbol into the trie, updating the prefixes in the process.
 /// This operation may change the layout of the trie by splicing edges in
 /// certain circumstances.
-pub fn put(self: *Trie, symbol: Symbol) !void {
+pub fn put(self: *Trie, symbol: ExportSymbol) !void {
     try self.createRoot();
     const node = try self.root.?.put(self.allocator, symbol.name);
     node.terminal_info = .{
@@ -356,7 +374,7 @@ const ReadError = error{
 };
 
 /// Parse the trie from a byte stream.
-pub fn read(self: *Trie, reader: anytype) ReadError!void {
+pub fn read(self: *Trie, reader: anytype) ReadError!usize {
     try self.createRoot();
     return self.root.?.read(self.allocator, reader);
 }
@@ -533,60 +551,34 @@ test "write Trie to a byte stream" {
     }
 }
 
-// test "parse Trie from byte stream" {
-//     var gpa = testing.allocator;
-
-//     const in_buffer = [_]u8{
-//         0x0,
-//         0x1,
-//         0x5f,
-//         0x0,
-//         0x5,
-//         0x0,
-//         0x2,
-//         0x5f,
-//         0x6d,
-//         0x68,
-//         0x5f,
-//         0x65,
-//         0x78,
-//         0x65,
-//         0x63,
-//         0x75,
-//         0x74,
-//         0x65,
-//         0x5f,
-//         0x68,
-//         0x65,
-//         0x61,
-//         0x64,
-//         0x65,
-//         0x72,
-//         0x0,
-//         0x21,
-//         0x6d,
-//         0x61,
-//         0x69,
-//         0x6e,
-//         0x0,
-//         0x25,
-//         0x2,
-//         0x0,
-//         0x0,
-//         0x0,
-//         0x3,
-//         0x0,
-//         0x80,
-//         0x20,
-//         0x0,
-//     };
-//     var stream = std.io.fixedBufferStream(in_buffer[0..]);
-//     var trie = Trie.init(gpa);
-//     defer trie.deinit();
-//     try trie.fromByteStream(&stream);
-
-//     var out_buffer = try trie.writeULEB128Mem();
-//     defer gpa.free(out_buffer);
-
-//     testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
-// }
+test "parse Trie from byte stream" {
+    var gpa = testing.allocator;
+
+    const in_buffer = [_]u8{
+        0x0, 0x1, // node root
+        0x5f, 0x0, 0x5, // edge '_'
+        0x0, 0x2, // non-terminal node
+        0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header'
+        0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header'
+        0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main'
+        0x2, 0x0, 0x0, 0x0, // terminal node
+        0x3, 0x0, 0x80, 0x20, 0x0, // terminal node
+    };
+
+    var in_stream = std.io.fixedBufferStream(in_buffer[0..]);
+    var trie = Trie.init(gpa);
+    defer trie.deinit();
+    const nread = try trie.read(in_stream.reader());
+
+    testing.expect(nread == in_buffer.len);
+
+    try trie.finalize();
+
+    var out_buffer = try gpa.alloc(u8, trie.size);
+    defer gpa.free(out_buffer);
+    var out_stream = std.io.fixedBufferStream(out_buffer);
+    const nwritten = try trie.write(out_stream.writer());
+
+    testing.expect(nwritten == trie.size);
+    testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
+}
-- 
cgit v1.2.3


From 184c0f3c4e140d3f0971bac184f0abce00d8d336 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 9 Dec 2020 11:38:11 +0100
Subject: stage2+macho: write code signature only when targeting aarch64

---
 src/link/MachO.zig | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 120326da96..1466f75486 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -301,7 +301,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    switch (self.base.options.output_mode) {
+    const output_mode = self.base.options.output_mode;
+    const target = self.base.options.target;
+
+    switch (output_mode) {
         .Exe => {
             if (self.entry_addr) |addr| {
                 // Update LC_MAIN with entry offset.
@@ -312,12 +315,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
             try self.writeExportTrie();
             try self.writeSymbolTable();
             try self.writeStringTable();
-            // Preallocate space for the code signature.
-            // We need to do this at this stage so that we have the load commands with proper values
-            // written out to the file.
-            // The most important here is to have the correct vm and filesize of the __LINKEDIT segment
-            // where the code signature goes into.
-            try self.writeCodeSignaturePadding();
+
+            if (target.cpu.arch == .aarch64) {
+                // Preallocate space for the code signature.
+                // We need to do this at this stage so that we have the load commands with proper values
+                // written out to the file.
+                // The most important here is to have the correct vm and filesize of the __LINKEDIT segment
+                // where the code signature goes into.
+                try self.writeCodeSignaturePadding();
+            }
         },
         .Obj => {},
         .Lib => return error.TODOImplementWritingLibFiles,
@@ -339,9 +345,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
 
     assert(!self.cmd_table_dirty);
 
-    switch (self.base.options.output_mode) {
-        .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last
-        else => {},
+    if (target.cpu.arch == .aarch64) {
+        switch (output_mode) {
+            .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last
+            else => {},
+        }
     }
 }
 
-- 
cgit v1.2.3


From 44e2f210bb3fee78d339bc5c75822e0311600f70 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 9 Dec 2020 11:43:04 +0100
Subject: lld+macho: clean up error message when padding insufficient

---
 src/link/MachO.zig | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 1466f75486..6fd0561cb1 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -760,13 +760,15 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
                     const text_section = text_segment.sections.items[self.text_section_index.?];
                     const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
-                    const needed_size = @sizeOf(macho.linkedit_data_command);
+                    const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den;
+
                     if (needed_size + after_last_cmd_offset > text_section.offset) {
-                        std.log.err("Unable to extend padding between load commands and start of __text section.", .{});
-                        std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size * alloc_num / alloc_den});
-                        std.log.err("fall back to the system linker.", .{});
+                        std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{});
+                        std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size});
+                        std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{});
                         return error.NotEnoughPadding;
                     }
+
                     const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
                     // TODO This is clunky.
                     self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64)));
-- 
cgit v1.2.3