From 4c3e6c5bff967388dddc7ec352017c7b712d9f06 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 8 Dec 2020 16:52:50 +0100 Subject: macho: cleanup export trie generation and parsing Now, ExportTrie is becoming usable for larger linking contexts such as linking in multiple object files, or relinking dylibs, etc. --- src/link/MachO.zig | 66 ++++++++--- src/link/MachO/Trie.zig | 303 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 254 insertions(+), 115 deletions(-) (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a2925b3b6b..153f47c340 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -754,13 +754,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64); const needed_size = @sizeOf(macho.linkedit_data_command); if (needed_size + after_last_cmd_offset > text_section.offset) { - // TODO We are in the position to be able to increase the padding by moving all sections - // by the required offset, but this requires a little bit more thinking and bookkeeping. - // For now, return an error informing the user of the problem. - log.err("Not enough padding between load commands and start of __text section:\n", .{}); - log.err("Offset after last load command: 0x{x}\n", .{after_last_cmd_offset}); - log.err("Beginning of __text section: 0x{x}\n", .{text_section.offset}); - log.err("Needed size: 0x{x}\n", .{needed_size}); + std.log.err("Unable to extend padding between load commands and start of __text section.", .{}); + std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size * alloc_num / alloc_den}); + std.log.err("fall back to the system linker.", .{}); return error.NotEnoughPadding; } const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; @@ -1799,38 +1795,36 @@ fn writeCodeSignature(self: *MachO) !void { fn writeExportTrie(self: *MachO) !void { if (self.global_symbols.items.len == 0) return; - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); + var trie = Trie.init(self.base.allocator); + defer trie.deinit(); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; for (self.global_symbols.items) |symbol| { // TODO figure out if we should put all global symbols into the export trie const name = self.getString(symbol.n_strx); assert(symbol.n_value >= text_segment.inner.vmaddr); - try trie.put(self.base.allocator, .{ + try trie.put(.{ .name = name, .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr, .export_flags = 0, // TODO workout creation of export flags }); } - var buffer: std.ArrayListUnmanaged(u8) = .{}; - defer buffer.deinit(self.base.allocator); - - try trie.writeULEB128Mem(self.base.allocator, &buffer); + var buffer = try trie.writeULEB128Mem(); + defer self.base.allocator.free(buffer); const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const export_size = @intCast(u32, mem.alignForward(buffer.items.len, @sizeOf(u64))); + const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); dyld_info.export_off = self.linkedit_segment_next_offset.?; dyld_info.export_size = export_size; log.debug("writing export trie from 0x{x} to 0x{x}\n", .{ dyld_info.export_off, dyld_info.export_off + export_size }); - if (export_size > buffer.items.len) { + if (export_size > buffer.len) { // Pad out to align(8). try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.export_off + export_size); } - try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off); + try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); self.linkedit_segment_next_offset = dyld_info.export_off + dyld_info.export_size; // Advance size of __LINKEDIT segment @@ -1917,7 +1911,9 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { switch (cmd.cmd()) { macho.LC_SEGMENT_64 => { const x = cmd.Segment; - if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) { + if (isSegmentOrSection(&x.inner.segname, "__PAGEZERO")) { + self.pagezero_segment_cmd_index = i; + } else if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) { self.linkedit_segment_cmd_index = i; } else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) { self.text_segment_cmd_index = i; @@ -1926,16 +1922,48 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { self.text_section_index = @intCast(u16, j); } } + } else if (isSegmentOrSection(&x.inner.segname, "__DATA")) { + self.data_segment_cmd_index = i; } }, + macho.LC_DYLD_INFO_ONLY => { + self.dyld_info_cmd_index = i; + }, macho.LC_SYMTAB => { self.symtab_cmd_index = i; }, + macho.LC_DYSYMTAB => { + self.dysymtab_cmd_index = i; + }, + macho.LC_LOAD_DYLINKER => { + self.dylinker_cmd_index = i; + }, + macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => { + self.version_min_cmd_index = i; + }, + macho.LC_SOURCE_VERSION => { + self.source_version_cmd_index = i; + }, + macho.LC_MAIN => { + self.main_cmd_index = i; + }, + macho.LC_LOAD_DYLIB => { + self.libsystem_cmd_index = i; // TODO This is incorrect, but we'll fixup later. + }, + macho.LC_FUNCTION_STARTS => { + self.function_starts_cmd_index = i; + }, + macho.LC_DATA_IN_CODE => { + self.data_in_code_cmd_index = i; + }, macho.LC_CODE_SIGNATURE => { self.code_signature_cmd_index = i; }, // TODO populate more MachO fields - else => {}, + else => { + std.log.err("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); + return error.UnknownLoadCommand; + }, } self.load_commands.appendAssumeCapacity(cmd); } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 34ce4e99b9..cdc6581a06 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -44,20 +44,23 @@ pub const Symbol = struct { export_flags: u64, }; -const Edge = struct { +pub const Edge = struct { from: *Node, to: *Node, - label: []const u8, + label: []u8, - fn deinit(self: *Edge, alloc: *Allocator) void { - self.to.deinit(alloc); - alloc.destroy(self.to); + fn deinit(self: *Edge, allocator: *Allocator) void { + self.to.deinit(); + allocator.destroy(self.to); + allocator.free(self.label); self.from = undefined; self.to = undefined; + self.label = undefined; } }; -const Node = struct { +pub const Node = struct { + base: *Trie, /// Export flags associated with this exported symbol (if any). export_flags: ?u64 = null, /// VM address offset wrt to the section this symbol is defined against (if any). @@ -67,73 +70,97 @@ const Node = struct { /// List of all edges originating from this node. edges: std.ArrayListUnmanaged(Edge) = .{}, - fn deinit(self: *Node, alloc: *Allocator) void { + fn deinit(self: *Node) void { for (self.edges.items) |*edge| { - edge.deinit(alloc); + edge.deinit(self.base.allocator); } - self.edges.deinit(alloc); + self.edges.deinit(self.base.allocator); } - const PutResult = struct { - /// Node reached at this stage of `put` op. - node: *Node, - /// Count of newly inserted nodes at this stage of `put` op. - node_count: usize, - }; - /// Inserts a new node starting from `self`. - fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult { - var curr_node_count = node_count; + fn put(self: *Node, label: []const u8) !*Node { // Check for match with edges from this node. for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{ - .node = edge.to, - .node_count = curr_node_count, - }; + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count); + if (match == edge.label.len) return edge.to.put(label[match..]); // Found a match, need to splice up nodes. // From: A -> B // To: A -> C -> B - const mid = try alloc.create(Node); - mid.* = .{}; - const to_label = edge.label; + const mid = try self.base.allocator.create(Node); + mid.* = .{ .base = self.base }; + var to_label = try self.base.allocator.dupe(u8, edge.label[match..]); + self.base.allocator.free(edge.label); const to_node = edge.to; edge.to = mid; - edge.label = label[0..match]; - curr_node_count += 1; + edge.label = try self.base.allocator.dupe(u8, label[0..match]); + self.base.node_count += 1; - try mid.edges.append(alloc, .{ + try mid.edges.append(self.base.allocator, .{ .from = mid, .to = to_node, - .label = to_label[match..], + .label = to_label, }); - if (match == label.len) { - return PutResult{ .node = to_node, .node_count = curr_node_count }; - } else { - return mid.put(alloc, label[match..], curr_node_count); - } + return if (match == label.len) to_node else mid.put(label[match..]); } // Add a new node. - const node = try alloc.create(Node); - node.* = .{}; - curr_node_count += 1; + const node = try self.base.allocator.create(Node); + node.* = .{ .base = self.base }; + self.base.node_count += 1; - try self.edges.append(alloc, .{ + try self.edges.append(self.base.allocator, .{ .from = self, .to = node, - .label = label, + .label = try self.base.allocator.dupe(u8, label), }); - return PutResult{ .node = node, .node_count = curr_node_count }; + return node; + } + + fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void { + self.trie_offset = try stream.getPos(); + var reader = stream.reader(); + const node_size = try leb.readULEB128(u64, reader); + if (node_size > 0) { + self.export_flags = try leb.readULEB128(u64, reader); + // TODO Parse flags. + self.vmaddr_offset = try leb.readULEB128(u64, reader); + } + const nedges = try reader.readByte(); + self.base.node_count += nedges; + var i: usize = 0; + while (i < nedges) : (i += 1) { + var label = blk: { + var label_buf = std.ArrayList(u8).init(self.base.allocator); + while (true) { + const next = try reader.readByte(); + if (next == @as(u8, 0)) + break; + try label_buf.append(next); + } + break :blk label_buf.toOwnedSlice(); + }; + const seek_to = try leb.readULEB128(u64, reader); + const cur_pos = try stream.getPos(); + try stream.seekTo(seek_to); + var node = try self.base.allocator.create(Node); + node.* = .{ .base = self.base }; + try node.fromByteStream(stream); + try self.edges.append(self.base.allocator, .{ + .from = self, + .to = node, + .label = label, + }); + try stream.seekTo(cur_pos); + } } /// This method should only be called *after* updateOffset has been called! /// In case this is not upheld, this method will panic. - fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void { + fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void { assert(self.trie_offset != null); // You need to call updateOffset first. if (self.vmaddr_offset) |offset| { // Terminal node info: encode export flags and vmaddr offset of this symbol. @@ -221,64 +248,95 @@ const Node = struct { /// the count always starts at 1. node_count: usize = 1, /// The root node of the trie. -root: Node = .{}, +root: ?Node = null, +allocator: *Allocator, + +pub fn init(allocator: *Allocator) Trie { + return .{ .allocator = allocator }; +} /// Insert a symbol into the trie, updating the prefixes in the process. /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. -pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void { - const res = try self.root.put(alloc, symbol.name, 0); - self.node_count += res.node_count; - res.node.vmaddr_offset = symbol.vmaddr_offset; - res.node.export_flags = symbol.export_flags; +pub fn put(self: *Trie, symbol: Symbol) !void { + if (self.root == null) { + self.root = .{ .base = self }; + } + const node = try self.root.?.put(symbol.name); + node.vmaddr_offset = symbol.vmaddr_offset; + node.export_flags = symbol.export_flags; } -/// Write the trie to a buffer ULEB128 encoded. -pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void { - var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}; - defer ordered_nodes.deinit(alloc); +const FromByteStreamError = error{ + OutOfMemory, + EndOfStream, + Overflow, +}; - try ordered_nodes.ensureCapacity(alloc, self.node_count); - walkInOrder(&self.root, &ordered_nodes); +/// Parse the trie from a byte stream. +pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void { + if (self.root == null) { + self.root = .{ .base = self }; + } + return self.root.?.fromByteStream(stream); +} + +/// Write the trie to a buffer ULEB128 encoded. +/// Caller owns the memory and needs to free it. +pub fn writeULEB128Mem(self: *Trie) ![]u8 { + var ordered_nodes = try self.nodes(); + defer self.allocator.free(ordered_nodes); var offset: usize = 0; var more: bool = true; while (more) { offset = 0; more = false; - for (ordered_nodes.items) |node| { + for (ordered_nodes) |node| { const res = node.updateOffset(offset); offset += res.node_size; if (res.updated) more = true; } } - try buffer.ensureCapacity(alloc, buffer.items.len + offset); - for (ordered_nodes.items) |node| { - try node.writeULEB128Mem(buffer); + var buffer = std.ArrayList(u8).init(self.allocator); + try buffer.ensureCapacity(offset); + for (ordered_nodes) |node| { + try node.writeULEB128Mem(&buffer); } + return buffer.toOwnedSlice(); } -/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes. -fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void { - list.appendAssumeCapacity(node); - for (node.edges.items) |*edge| { - walkInOrder(edge.to, list); +pub fn nodes(self: *Trie) ![]*Node { + var ordered_nodes = std.ArrayList(*Node).init(self.allocator); + try ordered_nodes.ensureCapacity(self.node_count); + + comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) }); + var fifo = Fifo.init(); + try fifo.writeItem(&self.root.?); + + while (fifo.readItem()) |next| { + for (next.edges.items) |*edge| { + try fifo.writeItem(edge.to); + } + ordered_nodes.appendAssumeCapacity(next); } + + return ordered_nodes.toOwnedSlice(); } -pub fn deinit(self: *Trie, alloc: *Allocator) void { - self.root.deinit(alloc); +pub fn deinit(self: *Trie) void { + self.root.?.deinit(); } test "Trie node count" { var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); + var trie = Trie.init(gpa); + defer trie.deinit(); testing.expectEqual(trie.node_count, 1); - try trie.put(gpa, .{ + try trie.put(.{ .name = "_main", .vmaddr_offset = 0, .export_flags = 0, @@ -286,14 +344,14 @@ test "Trie node count" { testing.expectEqual(trie.node_count, 2); // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ + try trie.put(.{ .name = "_main", .vmaddr_offset = 0, .export_flags = 0, }); testing.expectEqual(trie.node_count, 2); - try trie.put(gpa, .{ + try trie.put(.{ .name = "__mh_execute_header", .vmaddr_offset = 0x1000, .export_flags = 0, @@ -301,13 +359,13 @@ test "Trie node count" { testing.expectEqual(trie.node_count, 4); // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ + try trie.put(.{ .name = "__mh_execute_header", .vmaddr_offset = 0x1000, .export_flags = 0, }); testing.expectEqual(trie.node_count, 4); - try trie.put(gpa, .{ + try trie.put(.{ .name = "_main", .vmaddr_offset = 0, .export_flags = 0, @@ -317,31 +375,28 @@ test "Trie node count" { test "Trie basic" { var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - - // root - testing.expect(trie.root.edges.items.len == 0); + var trie = Trie.init(gpa); + defer trie.deinit(); // root --- _st ---> node - try trie.put(gpa, .{ + try trie.put(.{ .name = "_st", .vmaddr_offset = 0, .export_flags = 0, }); - testing.expect(trie.root.edges.items.len == 1); - testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st")); + testing.expect(trie.root.?.edges.items.len == 1); + testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); { // root --- _st ---> node --- art ---> node - try trie.put(gpa, .{ + try trie.put(.{ .name = "_start", .vmaddr_offset = 0, .export_flags = 0, }); - testing.expect(trie.root.edges.items.len == 1); + testing.expect(trie.root.?.edges.items.len == 1); - const nextEdge = &trie.root.edges.items[0]; + const nextEdge = &trie.root.?.edges.items[0]; testing.expect(mem.eql(u8, nextEdge.label, "_st")); testing.expect(nextEdge.to.edges.items.len == 1); testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); @@ -350,14 +405,14 @@ test "Trie basic" { // root --- _ ---> node --- st ---> node --- art ---> node // | // | --- main ---> node - try trie.put(gpa, .{ + try trie.put(.{ .name = "_main", .vmaddr_offset = 0, .export_flags = 0, }); - testing.expect(trie.root.edges.items.len == 1); + testing.expect(trie.root.?.edges.items.len == 1); - const nextEdge = &trie.root.edges.items[0]; + const nextEdge = &trie.root.?.edges.items[0]; testing.expect(mem.eql(u8, nextEdge.label, "_")); testing.expect(nextEdge.to.edges.items.len == 2); testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); @@ -370,24 +425,22 @@ test "Trie basic" { test "Trie.writeULEB128Mem" { var gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); + var trie = Trie.init(gpa); + defer trie.deinit(); - try trie.put(gpa, .{ + try trie.put(.{ .name = "__mh_execute_header", .vmaddr_offset = 0, .export_flags = 0, }); - try trie.put(gpa, .{ + try trie.put(.{ .name = "_main", .vmaddr_offset = 0x1000, .export_flags = 0, }); - var buffer: std.ArrayListUnmanaged(u8) = .{}; - defer buffer.deinit(gpa); - - try trie.writeULEB128Mem(gpa, &buffer); + var buffer = try trie.writeULEB128Mem(); + defer gpa.free(buffer); const exp_buffer = [_]u8{ 0x0, @@ -434,6 +487,64 @@ test "Trie.writeULEB128Mem" { 0x0, }; - testing.expect(buffer.items.len == exp_buffer.len); - testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..])); + testing.expect(buffer.len == exp_buffer.len); + testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); +} + +test "parse Trie from byte stream" { + var gpa = testing.allocator; + + const in_buffer = [_]u8{ + 0x0, + 0x1, + 0x5f, + 0x0, + 0x5, + 0x0, + 0x2, + 0x5f, + 0x6d, + 0x68, + 0x5f, + 0x65, + 0x78, + 0x65, + 0x63, + 0x75, + 0x74, + 0x65, + 0x5f, + 0x68, + 0x65, + 0x61, + 0x64, + 0x65, + 0x72, + 0x0, + 0x21, + 0x6d, + 0x61, + 0x69, + 0x6e, + 0x0, + 0x25, + 0x2, + 0x0, + 0x0, + 0x0, + 0x3, + 0x0, + 0x80, + 0x20, + 0x0, + }; + var stream = std.io.fixedBufferStream(in_buffer[0..]); + var trie = Trie.init(gpa); + defer trie.deinit(); + try trie.fromByteStream(&stream); + + var out_buffer = try trie.writeULEB128Mem(); + defer gpa.free(out_buffer); + + testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); } -- cgit v1.2.3 From a579f8ae8d6009d95ef22879bc725a233f838d6f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 8 Dec 2020 17:17:48 +0100 Subject: macho: add generic terminal info nullable struct to a node --- src/link/MachO/Trie.zig | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index cdc6581a06..3877c8b5a9 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -34,6 +34,7 @@ const std = @import("std"); const mem = std.mem; const leb = std.leb; const log = std.log.scoped(.link); +const macho = std.macho; const testing = std.testing; const assert = std.debug.assert; const Allocator = mem.Allocator; @@ -61,10 +62,14 @@ pub const Edge = struct { pub const Node = struct { base: *Trie, - /// Export flags associated with this exported symbol (if any). - export_flags: ?u64 = null, - /// VM address offset wrt to the section this symbol is defined against (if any). - vmaddr_offset: ?u64 = null, + /// Terminal info associated with this node. + /// If this node is not a terminal node, info is null. + terminal_info: ?struct { + /// Export flags associated with this exported symbol. + export_flags: u64, + /// VM address offset wrt to the section this symbol is defined against. + vmaddr_offset: u64, + } = null, /// Offset of this node in the trie output byte stream. trie_offset: ?usize = null, /// List of all edges originating from this node. @@ -125,9 +130,15 @@ pub const Node = struct { var reader = stream.reader(); const node_size = try leb.readULEB128(u64, reader); if (node_size > 0) { - self.export_flags = try leb.readULEB128(u64, reader); - // TODO Parse flags. - self.vmaddr_offset = try leb.readULEB128(u64, reader); + const export_flags = try leb.readULEB128(u64, reader); + // TODO Parse special flags. + assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + const vmaddr_offset = try leb.readULEB128(u64, reader); + self.terminal_info = .{ + .export_flags = export_flags, + .vmaddr_offset = vmaddr_offset, + }; } const nedges = try reader.readByte(); self.base.node_count += nedges; @@ -162,13 +173,16 @@ pub const Node = struct { /// In case this is not upheld, this method will panic. fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void { assert(self.trie_offset != null); // You need to call updateOffset first. - if (self.vmaddr_offset) |offset| { + if (self.terminal_info) |info| { // Terminal node info: encode export flags and vmaddr offset of this symbol. var info_buf_len: usize = 0; var info_buf: [@sizeOf(u64) * 2]u8 = undefined; var info_stream = std.io.fixedBufferStream(&info_buf); - try leb.writeULEB128(info_stream.writer(), self.export_flags.?); - try leb.writeULEB128(info_stream.writer(), offset); + // TODO Implement for special flags. + assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + try leb.writeULEB128(info_stream.writer(), info.export_flags); + try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset); // Encode the size of the terminal node info. var size_buf: [@sizeOf(u64)]u8 = undefined; @@ -208,9 +222,9 @@ pub const Node = struct { /// Updates offset of this node in the output byte stream. fn updateOffset(self: *Node, offset: usize) UpdateResult { var node_size: usize = 0; - if (self.vmaddr_offset) |vmaddr| { - node_size += sizeULEB128Mem(self.export_flags.?); - node_size += sizeULEB128Mem(vmaddr); + if (self.terminal_info) |info| { + node_size += sizeULEB128Mem(info.export_flags); + node_size += sizeULEB128Mem(info.vmaddr_offset); node_size += sizeULEB128Mem(node_size); } else { node_size += 1; // 0x0 for non-terminal nodes @@ -263,8 +277,10 @@ pub fn put(self: *Trie, symbol: Symbol) !void { self.root = .{ .base = self }; } const node = try self.root.?.put(symbol.name); - node.vmaddr_offset = symbol.vmaddr_offset; - node.export_flags = symbol.export_flags; + node.terminal_info = .{ + .vmaddr_offset = symbol.vmaddr_offset, + .export_flags = symbol.export_flags, + }; } const FromByteStreamError = error{ -- cgit v1.2.3 From a28340405392b4a0a687e668406a067be1ae5e3c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 9 Dec 2020 11:01:51 +0100 Subject: macho: split writing Trie into finalize and const write --- src/link/MachO.zig | 6 +- src/link/MachO/Trie.zig | 432 +++++++++++++++++++++++++----------------------- 2 files changed, 234 insertions(+), 204 deletions(-) (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 153f47c340..c265deabdf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1810,8 +1810,12 @@ fn writeExportTrie(self: *MachO) !void { }); } - var buffer = try trie.writeULEB128Mem(); + try trie.finalize(); + var buffer = try self.base.allocator.alloc(u8, trie.size); defer self.base.allocator.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + const nwritten = try trie.write(stream.writer()); + assert(nwritten == trie.size); const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 3877c8b5a9..b5f2057ff1 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -51,7 +51,7 @@ pub const Edge = struct { label: []u8, fn deinit(self: *Edge, allocator: *Allocator) void { - self.to.deinit(); + self.to.deinit(allocator); allocator.destroy(self.to); allocator.free(self.label); self.from = undefined; @@ -62,6 +62,7 @@ pub const Edge = struct { pub const Node = struct { base: *Trie, + /// Terminal info associated with this node. /// If this node is not a terminal node, info is null. terminal_info: ?struct { @@ -70,82 +71,93 @@ pub const Node = struct { /// VM address offset wrt to the section this symbol is defined against. vmaddr_offset: u64, } = null, + /// Offset of this node in the trie output byte stream. trie_offset: ?usize = null, + /// List of all edges originating from this node. edges: std.ArrayListUnmanaged(Edge) = .{}, - fn deinit(self: *Node) void { + node_dirty: bool = true, + + fn deinit(self: *Node, allocator: *Allocator) void { for (self.edges.items) |*edge| { - edge.deinit(self.base.allocator); + edge.deinit(allocator); } - self.edges.deinit(self.base.allocator); + self.edges.deinit(allocator); } /// Inserts a new node starting from `self`. - fn put(self: *Node, label: []const u8) !*Node { + fn put(self: *Node, allocator: *Allocator, label: []const u8) !*Node { // Check for match with edges from this node. for (self.edges.items) |*edge| { const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(label[match..]); + if (match == edge.label.len) return edge.to.put(allocator, label[match..]); // Found a match, need to splice up nodes. // From: A -> B // To: A -> C -> B - const mid = try self.base.allocator.create(Node); + const mid = try allocator.create(Node); mid.* = .{ .base = self.base }; - var to_label = try self.base.allocator.dupe(u8, edge.label[match..]); - self.base.allocator.free(edge.label); + var to_label = try allocator.dupe(u8, edge.label[match..]); + allocator.free(edge.label); const to_node = edge.to; edge.to = mid; - edge.label = try self.base.allocator.dupe(u8, label[0..match]); + edge.label = try allocator.dupe(u8, label[0..match]); self.base.node_count += 1; - try mid.edges.append(self.base.allocator, .{ + try mid.edges.append(allocator, .{ .from = mid, .to = to_node, .label = to_label, }); - return if (match == label.len) to_node else mid.put(label[match..]); + return if (match == label.len) to_node else mid.put(allocator, label[match..]); } // Add a new node. - const node = try self.base.allocator.create(Node); + const node = try allocator.create(Node); node.* = .{ .base = self.base }; self.base.node_count += 1; - try self.edges.append(self.base.allocator, .{ + try self.edges.append(allocator, .{ .from = self, .to = node, - .label = try self.base.allocator.dupe(u8, label), + .label = try allocator.dupe(u8, label), }); return node; } - fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void { - self.trie_offset = try stream.getPos(); - var reader = stream.reader(); + /// Recursively parses the node from the input byte stream. + fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!void { + self.node_dirty = true; + + self.trie_offset = try reader.context.getPos(); + const node_size = try leb.readULEB128(u64, reader); if (node_size > 0) { const export_flags = try leb.readULEB128(u64, reader); // TODO Parse special flags. assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + const vmaddr_offset = try leb.readULEB128(u64, reader); + self.terminal_info = .{ .export_flags = export_flags, .vmaddr_offset = vmaddr_offset, }; } + const nedges = try reader.readByte(); self.base.node_count += nedges; + var i: usize = 0; while (i < nedges) : (i += 1) { var label = blk: { - var label_buf = std.ArrayList(u8).init(self.base.allocator); + var label_buf = std.ArrayList(u8).init(allocator); while (true) { const next = try reader.readByte(); if (next == @as(u8, 0)) @@ -154,25 +166,32 @@ pub const Node = struct { } break :blk label_buf.toOwnedSlice(); }; + const seek_to = try leb.readULEB128(u64, reader); - const cur_pos = try stream.getPos(); - try stream.seekTo(seek_to); - var node = try self.base.allocator.create(Node); + const cur_pos = try reader.context.getPos(); + try reader.context.seekTo(seek_to); + + const node = try allocator.create(Node); node.* = .{ .base = self.base }; - try node.fromByteStream(stream); - try self.edges.append(self.base.allocator, .{ + + try node.read(allocator, reader); + try self.edges.append(allocator, .{ .from = self, .to = node, .label = label, }); - try stream.seekTo(cur_pos); + try reader.context.seekTo(cur_pos); } } - /// This method should only be called *after* updateOffset has been called! - /// In case this is not upheld, this method will panic. - fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void { - assert(self.trie_offset != null); // You need to call updateOffset first. + /// Writes this node to a byte stream. + /// The children of this node *are* not written to the byte stream + /// recursively. To write all nodes to a byte stream in sequence, + /// iterate over `Trie.ordered_nodes` and call this method on each node. + /// This is one of the requirements of the MachO. + /// Panics if `finalize` was not called before calling this method. + fn write(self: Node, writer: anytype) !void { + assert(!self.node_dirty); if (self.terminal_info) |info| { // Terminal node info: encode export flags and vmaddr offset of this symbol. var info_buf_len: usize = 0; @@ -189,38 +208,35 @@ pub const Node = struct { var size_stream = std.io.fixedBufferStream(&size_buf); try leb.writeULEB128(size_stream.writer(), info_stream.pos); - // Now, write them to the output buffer. - buffer.appendSliceAssumeCapacity(size_buf[0..size_stream.pos]); - buffer.appendSliceAssumeCapacity(info_buf[0..info_stream.pos]); + // Now, write them to the output stream. + try writer.writeAll(size_buf[0..size_stream.pos]); + try writer.writeAll(info_buf[0..info_stream.pos]); } else { // Non-terminal node is delimited by 0 byte. - buffer.appendAssumeCapacity(0); + try writer.writeByte(0); } // Write number of edges (max legal number of edges is 256). - buffer.appendAssumeCapacity(@intCast(u8, self.edges.items.len)); + try writer.writeByte(@intCast(u8, self.edges.items.len)); for (self.edges.items) |edge| { - // Write edges labels. - buffer.appendSliceAssumeCapacity(edge.label); - buffer.appendAssumeCapacity(0); - - var buf: [@sizeOf(u64)]u8 = undefined; - var buf_stream = std.io.fixedBufferStream(&buf); - try leb.writeULEB128(buf_stream.writer(), edge.to.trie_offset.?); - buffer.appendSliceAssumeCapacity(buf[0..buf_stream.pos]); + // Write edge label and offset to next node in trie. + try writer.writeAll(edge.label); + try writer.writeByte(0); + try leb.writeULEB128(writer, edge.to.trie_offset.?); } } - const UpdateResult = struct { + const FinalizeResult = struct { /// Current size of this node in bytes. node_size: usize, + /// True if the trie offset of this node in the output byte stream /// would need updating; false otherwise. updated: bool, }; /// Updates offset of this node in the output byte stream. - fn updateOffset(self: *Node, offset: usize) UpdateResult { + fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult { var node_size: usize = 0; if (self.terminal_info) |info| { node_size += sizeULEB128Mem(info.export_flags); @@ -237,8 +253,9 @@ pub const Node = struct { } const trie_offset = self.trie_offset orelse 0; - const updated = offset != trie_offset; - self.trie_offset = offset; + const updated = offset_in_trie != trie_offset; + self.trie_offset = offset_in_trie; + self.node_dirty = false; return .{ .node_size = node_size, .updated = updated }; } @@ -256,15 +273,30 @@ pub const Node = struct { } }; -/// Count of nodes in the trie. -/// The count is updated at every `put` call. -/// The trie always consists of at least a root node, hence -/// the count always starts at 1. -node_count: usize = 1, /// The root node of the trie. -root: ?Node = null, +root: ?*Node = null, + allocator: *Allocator, +/// If you want to access nodes ordered in DFS fashion, +/// you should call `finalize` first since the nodes +/// in this container are not guaranteed to not be stale +/// if more insertions took place after the last `finalize` +/// call. +ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, + +/// The size of the trie in bytes. +/// This value may be outdated if there were additional +/// insertions performed after `finalize` was called. +/// Call `finalize` before accessing this value to ensure +/// it is up-to-date. +size: usize = 0, + +/// Number of nodes currently in the trie. +node_count: usize = 0, + +trie_dirty: bool = true, + pub fn init(allocator: *Allocator) Trie { return .{ .allocator = allocator }; } @@ -273,76 +305,90 @@ pub fn init(allocator: *Allocator) Trie { /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. pub fn put(self: *Trie, symbol: Symbol) !void { - if (self.root == null) { - self.root = .{ .base = self }; - } - const node = try self.root.?.put(symbol.name); + try self.createRoot(); + const node = try self.root.?.put(self.allocator, symbol.name); node.terminal_info = .{ .vmaddr_offset = symbol.vmaddr_offset, .export_flags = symbol.export_flags, }; + self.trie_dirty = true; } -const FromByteStreamError = error{ - OutOfMemory, - EndOfStream, - Overflow, -}; +/// Finalizes this trie for writing to a byte stream. +/// This step performs multiple passes through the trie ensuring +/// there are no gaps after every `Node` is ULEB128 encoded. +/// Call this method before trying to `write` the trie to a byte stream. +pub fn finalize(self: *Trie) !void { + if (!self.trie_dirty) return; -/// Parse the trie from a byte stream. -pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void { - if (self.root == null) { - self.root = .{ .base = self }; - } - return self.root.?.fromByteStream(stream); -} + self.ordered_nodes.shrinkRetainingCapacity(0); + try self.ordered_nodes.ensureCapacity(self.allocator, self.node_count); -/// Write the trie to a buffer ULEB128 encoded. -/// Caller owns the memory and needs to free it. -pub fn writeULEB128Mem(self: *Trie) ![]u8 { - var ordered_nodes = try self.nodes(); - defer self.allocator.free(ordered_nodes); + comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) }); + var fifo = Fifo.init(); + try fifo.writeItem(self.root.?); + + while (fifo.readItem()) |next| { + for (next.edges.items) |*edge| { + try fifo.writeItem(edge.to); + } + self.ordered_nodes.appendAssumeCapacity(next); + } - var offset: usize = 0; var more: bool = true; while (more) { - offset = 0; + self.size = 0; more = false; - for (ordered_nodes) |node| { - const res = node.updateOffset(offset); - offset += res.node_size; + for (self.ordered_nodes.items) |node| { + const res = node.finalize(self.size); + self.size += res.node_size; if (res.updated) more = true; } } - var buffer = std.ArrayList(u8).init(self.allocator); - try buffer.ensureCapacity(offset); - for (ordered_nodes) |node| { - try node.writeULEB128Mem(&buffer); - } - return buffer.toOwnedSlice(); + self.trie_dirty = false; } -pub fn nodes(self: *Trie) ![]*Node { - var ordered_nodes = std.ArrayList(*Node).init(self.allocator); - try ordered_nodes.ensureCapacity(self.node_count); +const ReadError = error{ + OutOfMemory, + EndOfStream, + Overflow, +}; - comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) }); - var fifo = Fifo.init(); - try fifo.writeItem(&self.root.?); +/// Parse the trie from a byte stream. +pub fn read(self: *Trie, reader: anytype) ReadError!void { + try self.createRoot(); + return self.root.?.read(self.allocator, reader); +} - while (fifo.readItem()) |next| { - for (next.edges.items) |*edge| { - try fifo.writeItem(edge.to); - } - ordered_nodes.appendAssumeCapacity(next); +/// Write the trie to a byte stream. +/// Caller owns the memory and needs to free it. +/// Panics if the trie was not finalized using `finalize` +/// before calling this method. +pub fn write(self: Trie, writer: anytype) !usize { + assert(!self.trie_dirty); + var counting_writer = std.io.countingWriter(writer); + for (self.ordered_nodes.items) |node| { + try node.write(counting_writer.writer()); } - - return ordered_nodes.toOwnedSlice(); + return counting_writer.bytes_written; } pub fn deinit(self: *Trie) void { - self.root.?.deinit(); + if (self.root) |root| { + root.deinit(self.allocator); + self.allocator.destroy(root); + } + self.ordered_nodes.deinit(self.allocator); +} + +fn createRoot(self: *Trie) !void { + if (self.root == null) { + const root = try self.allocator.create(Node); + root.* = .{ .base = self }; + self.root = root; + self.node_count += 1; + } } test "Trie node count" { @@ -350,7 +396,8 @@ test "Trie node count" { var trie = Trie.init(gpa); defer trie.deinit(); - testing.expectEqual(trie.node_count, 1); + testing.expectEqual(trie.node_count, 0); + testing.expect(trie.root == null); try trie.put(.{ .name = "_main", @@ -439,7 +486,7 @@ test "Trie basic" { } } -test "Trie.writeULEB128Mem" { +test "write Trie to a byte stream" { var gpa = testing.allocator; var trie = Trie.init(gpa); defer trie.deinit(); @@ -455,112 +502,91 @@ test "Trie.writeULEB128Mem" { .export_flags = 0, }); - var buffer = try trie.writeULEB128Mem(); - defer gpa.free(buffer); + try trie.finalize(); + try trie.finalize(); // Finalizing mulitple times is a nop subsequently unless we add new nodes. const exp_buffer = [_]u8{ - 0x0, - 0x1, - 0x5f, - 0x0, - 0x5, - 0x0, - 0x2, - 0x5f, - 0x6d, - 0x68, - 0x5f, - 0x65, - 0x78, - 0x65, - 0x63, - 0x75, - 0x74, - 0x65, - 0x5f, - 0x68, - 0x65, - 0x61, - 0x64, - 0x65, - 0x72, - 0x0, - 0x21, - 0x6d, - 0x61, - 0x69, - 0x6e, - 0x0, - 0x25, - 0x2, - 0x0, - 0x0, - 0x0, - 0x3, - 0x0, - 0x80, - 0x20, - 0x0, + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node }; - testing.expect(buffer.len == exp_buffer.len); - testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); + var buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + { + const nwritten = try trie.write(stream.writer()); + testing.expect(nwritten == trie.size); + testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); + } + { + // Writing finalized trie again should yield the same result. + try stream.seekTo(0); + const nwritten = try trie.write(stream.writer()); + testing.expect(nwritten == trie.size); + testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); + } } -test "parse Trie from byte stream" { - var gpa = testing.allocator; - - const in_buffer = [_]u8{ - 0x0, - 0x1, - 0x5f, - 0x0, - 0x5, - 0x0, - 0x2, - 0x5f, - 0x6d, - 0x68, - 0x5f, - 0x65, - 0x78, - 0x65, - 0x63, - 0x75, - 0x74, - 0x65, - 0x5f, - 0x68, - 0x65, - 0x61, - 0x64, - 0x65, - 0x72, - 0x0, - 0x21, - 0x6d, - 0x61, - 0x69, - 0x6e, - 0x0, - 0x25, - 0x2, - 0x0, - 0x0, - 0x0, - 0x3, - 0x0, - 0x80, - 0x20, - 0x0, - }; - var stream = std.io.fixedBufferStream(in_buffer[0..]); - var trie = Trie.init(gpa); - defer trie.deinit(); - try trie.fromByteStream(&stream); - - var out_buffer = try trie.writeULEB128Mem(); - defer gpa.free(out_buffer); - - testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); -} +// test "parse Trie from byte stream" { +// var gpa = testing.allocator; + +// const in_buffer = [_]u8{ +// 0x0, +// 0x1, +// 0x5f, +// 0x0, +// 0x5, +// 0x0, +// 0x2, +// 0x5f, +// 0x6d, +// 0x68, +// 0x5f, +// 0x65, +// 0x78, +// 0x65, +// 0x63, +// 0x75, +// 0x74, +// 0x65, +// 0x5f, +// 0x68, +// 0x65, +// 0x61, +// 0x64, +// 0x65, +// 0x72, +// 0x0, +// 0x21, +// 0x6d, +// 0x61, +// 0x69, +// 0x6e, +// 0x0, +// 0x25, +// 0x2, +// 0x0, +// 0x0, +// 0x0, +// 0x3, +// 0x0, +// 0x80, +// 0x20, +// 0x0, +// }; +// var stream = std.io.fixedBufferStream(in_buffer[0..]); +// var trie = Trie.init(gpa); +// defer trie.deinit(); +// try trie.fromByteStream(&stream); + +// var out_buffer = try trie.writeULEB128Mem(); +// defer gpa.free(out_buffer); + +// testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); +// } -- cgit v1.2.3 From 601600dec981e41d43bb72113d9284cbb9e1d9ae Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 9 Dec 2020 11:05:52 +0100 Subject: macho: parsing Trie now takes a reader and returns bytes read --- src/link/MachO.zig | 2 +- src/link/MachO/Trie.zig | 164 +++++++++++++++++++++++------------------------- 2 files changed, 79 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c265deabdf..120326da96 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1806,7 +1806,7 @@ fn writeExportTrie(self: *MachO) !void { try trie.put(.{ .name = name, .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr, - .export_flags = 0, // TODO workout creation of export flags + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index b5f2057ff1..0016ff329c 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -39,27 +39,6 @@ const testing = std.testing; const assert = std.debug.assert; const Allocator = mem.Allocator; -pub const Symbol = struct { - name: []const u8, - vmaddr_offset: u64, - export_flags: u64, -}; - -pub const Edge = struct { - from: *Node, - to: *Node, - label: []u8, - - fn deinit(self: *Edge, allocator: *Allocator) void { - self.to.deinit(allocator); - allocator.destroy(self.to); - allocator.free(self.label); - self.from = undefined; - self.to = undefined; - self.label = undefined; - } -}; - pub const Node = struct { base: *Trie, @@ -80,6 +59,22 @@ pub const Node = struct { node_dirty: bool = true, + /// Edge connecting to nodes in the trie. + pub const Edge = struct { + from: *Node, + to: *Node, + label: []u8, + + fn deinit(self: *Edge, allocator: *Allocator) void { + self.to.deinit(allocator); + allocator.destroy(self.to); + allocator.free(self.label); + self.from = undefined; + self.to = undefined; + self.label = undefined; + } + }; + fn deinit(self: *Node, allocator: *Allocator) void { for (self.edges.items) |*edge| { edge.deinit(allocator); @@ -131,10 +126,12 @@ pub const Node = struct { } /// Recursively parses the node from the input byte stream. - fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!void { + fn read(self: *Node, allocator: *Allocator, reader: anytype) Trie.ReadError!usize { self.node_dirty = true; + const trie_offset = try reader.context.getPos(); + self.trie_offset = trie_offset; - self.trie_offset = try reader.context.getPos(); + var nread: usize = 0; const node_size = try leb.readULEB128(u64, reader); if (node_size > 0) { @@ -154,9 +151,13 @@ pub const Node = struct { const nedges = try reader.readByte(); self.base.node_count += nedges; + nread += (try reader.context.getPos()) - trie_offset; + var i: usize = 0; while (i < nedges) : (i += 1) { - var label = blk: { + const edge_start_pos = try reader.context.getPos(); + + const label = blk: { var label_buf = std.ArrayList(u8).init(allocator); while (true) { const next = try reader.readByte(); @@ -168,20 +169,24 @@ pub const Node = struct { }; const seek_to = try leb.readULEB128(u64, reader); - const cur_pos = try reader.context.getPos(); + const return_pos = try reader.context.getPos(); + + nread += return_pos - edge_start_pos; try reader.context.seekTo(seek_to); const node = try allocator.create(Node); node.* = .{ .base = self.base }; - try node.read(allocator, reader); + nread += try node.read(allocator, reader); try self.edges.append(allocator, .{ .from = self, .to = node, .label = label, }); - try reader.context.seekTo(cur_pos); + try reader.context.seekTo(return_pos); } + + return nread; } /// Writes this node to a byte stream. @@ -301,10 +306,23 @@ pub fn init(allocator: *Allocator) Trie { return .{ .allocator = allocator }; } +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, + + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, + + /// Export flags of this exported symbol. + export_flags: u64, +}; + /// Insert a symbol into the trie, updating the prefixes in the process. /// This operation may change the layout of the trie by splicing edges in /// certain circumstances. -pub fn put(self: *Trie, symbol: Symbol) !void { +pub fn put(self: *Trie, symbol: ExportSymbol) !void { try self.createRoot(); const node = try self.root.?.put(self.allocator, symbol.name); node.terminal_info = .{ @@ -356,7 +374,7 @@ const ReadError = error{ }; /// Parse the trie from a byte stream. -pub fn read(self: *Trie, reader: anytype) ReadError!void { +pub fn read(self: *Trie, reader: anytype) ReadError!usize { try self.createRoot(); return self.root.?.read(self.allocator, reader); } @@ -533,60 +551,34 @@ test "write Trie to a byte stream" { } } -// test "parse Trie from byte stream" { -// var gpa = testing.allocator; - -// const in_buffer = [_]u8{ -// 0x0, -// 0x1, -// 0x5f, -// 0x0, -// 0x5, -// 0x0, -// 0x2, -// 0x5f, -// 0x6d, -// 0x68, -// 0x5f, -// 0x65, -// 0x78, -// 0x65, -// 0x63, -// 0x75, -// 0x74, -// 0x65, -// 0x5f, -// 0x68, -// 0x65, -// 0x61, -// 0x64, -// 0x65, -// 0x72, -// 0x0, -// 0x21, -// 0x6d, -// 0x61, -// 0x69, -// 0x6e, -// 0x0, -// 0x25, -// 0x2, -// 0x0, -// 0x0, -// 0x0, -// 0x3, -// 0x0, -// 0x80, -// 0x20, -// 0x0, -// }; -// var stream = std.io.fixedBufferStream(in_buffer[0..]); -// var trie = Trie.init(gpa); -// defer trie.deinit(); -// try trie.fromByteStream(&stream); - -// var out_buffer = try trie.writeULEB128Mem(); -// defer gpa.free(out_buffer); - -// testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); -// } +test "parse Trie from byte stream" { + var gpa = testing.allocator; + + const in_buffer = [_]u8{ + 0x0, 0x1, // node root + 0x5f, 0x0, 0x5, // edge '_' + 0x0, 0x2, // non-terminal node + 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' + 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' + 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' + 0x2, 0x0, 0x0, 0x0, // terminal node + 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node + }; + + var in_stream = std.io.fixedBufferStream(in_buffer[0..]); + var trie = Trie.init(gpa); + defer trie.deinit(); + const nread = try trie.read(in_stream.reader()); + + testing.expect(nread == in_buffer.len); + + try trie.finalize(); + + var out_buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(out_buffer); + var out_stream = std.io.fixedBufferStream(out_buffer); + const nwritten = try trie.write(out_stream.writer()); + + testing.expect(nwritten == trie.size); + testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); +} -- cgit v1.2.3 From 184c0f3c4e140d3f0971bac184f0abce00d8d336 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 9 Dec 2020 11:38:11 +0100 Subject: stage2+macho: write code signature only when targeting aarch64 --- src/link/MachO.zig | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 120326da96..1466f75486 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -301,7 +301,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); - switch (self.base.options.output_mode) { + const output_mode = self.base.options.output_mode; + const target = self.base.options.target; + + switch (output_mode) { .Exe => { if (self.entry_addr) |addr| { // Update LC_MAIN with entry offset. @@ -312,12 +315,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { try self.writeExportTrie(); try self.writeSymbolTable(); try self.writeStringTable(); - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(); + + if (target.cpu.arch == .aarch64) { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(); + } }, .Obj => {}, .Lib => return error.TODOImplementWritingLibFiles, @@ -339,9 +345,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { assert(!self.cmd_table_dirty); - switch (self.base.options.output_mode) { - .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last - else => {}, + if (target.cpu.arch == .aarch64) { + switch (output_mode) { + .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last + else => {}, + } } } -- cgit v1.2.3 From 44e2f210bb3fee78d339bc5c75822e0311600f70 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 9 Dec 2020 11:43:04 +0100 Subject: lld+macho: clean up error message when padding insufficient --- src/link/MachO.zig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1466f75486..6fd0561cb1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -760,13 +760,15 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = text_segment.sections.items[self.text_section_index.?]; const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64); - const needed_size = @sizeOf(macho.linkedit_data_command); + const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; + if (needed_size + after_last_cmd_offset > text_section.offset) { - std.log.err("Unable to extend padding between load commands and start of __text section.", .{}); - std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size * alloc_num / alloc_den}); - std.log.err("fall back to the system linker.", .{}); + std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); + std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); + std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); return error.NotEnoughPadding; } + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; // TODO This is clunky. self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64))); -- cgit v1.2.3