diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2024-01-28 00:07:01 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2024-01-28 00:40:50 +0100 |
| commit | 6337ce16ae76977b2444b5c07f3c436db4d5ece7 (patch) | |
| tree | f462496a8adce12d5535ab9a0490a2b45ecf1bbb | |
| parent | 190ea02e0d0c939c0b558927b63a03e30af4749a (diff) | |
| download | zig-6337ce16ae76977b2444b5c07f3c436db4d5ece7.tar.gz zig-6337ce16ae76977b2444b5c07f3c436db4d5ece7.zip | |
macho: do not allocate input files in full
| -rw-r--r-- | src/link/MachO.zig | 62 | ||||
| -rw-r--r-- | src/link/MachO/Archive.zig | 44 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 14 | ||||
| -rw-r--r-- | src/link/MachO/DwarfInfo.zig | 59 | ||||
| -rw-r--r-- | src/link/MachO/Dylib.zig | 99 | ||||
| -rw-r--r-- | src/link/MachO/InternalObject.zig | 50 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 296 | ||||
| -rw-r--r-- | src/link/MachO/Symbol.zig | 12 | ||||
| -rw-r--r-- | src/link/MachO/ZigObject.zig | 50 | ||||
| -rw-r--r-- | src/link/MachO/relocatable.zig | 3 |
10 files changed, 394 insertions, 295 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9ace2e3b82..0fb38928c6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -610,7 +610,10 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node if (mem.indexOf(u8, sect.segName(), "ZIG") == null) continue; // Non-Zig sections are handled separately // TODO: we will resolve and write ZigObject's TLS data twice: // once here, and once in writeAtoms - const code = zo.getAtomDataAlloc(self, gpa, atom.*) catch |err| switch (err) { + const atom_size = math.cast(usize, atom.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, atom_size); + defer gpa.free(code); + zo.getAtomData(self, atom.*, code) catch |err| switch (err) { error.InputOutput => { try self.reportUnexpectedError("fetching code for '{s}' failed", .{ atom.getName(self), @@ -625,7 +628,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node return error.FlushFailure; }, }; - defer gpa.free(code); const file_offset = sect.offset + atom.value - sect.addr; atom.resolveRelocs(self, code) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, @@ -974,17 +976,15 @@ fn parseObject(self: *MachO, path: []const u8) ParseError!void { const gpa = self.base.comp.gpa; const file = try std.fs.cwd().openFile(path, .{}); - defer file.close(); const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); }; - const data = try file.readToEndAlloc(gpa, std.math.maxInt(u32)); const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = .{ .path = try gpa.dupe(u8, path), + .file = file, .mtime = mtime, - .data = data, .index = index, } }); try self.objects.append(gpa, index); @@ -1013,17 +1013,9 @@ fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Ar const file = try std.fs.cwd().openFile(lib.path, .{}); defer file.close(); - const data = if (fat_arch) |arch| blk: { - try file.seekTo(arch.offset); - const data = try gpa.alloc(u8, arch.size); - const nread = try file.readAll(data); - if (nread != arch.size) return error.InputOutput; - break :blk data; - } else try file.readToEndAlloc(gpa, std.math.maxInt(u32)); - - var archive = Archive{ .path = try gpa.dupe(u8, lib.path), .data = data }; + var archive = Archive{}; defer archive.deinit(gpa); - try archive.parse(self); + try archive.parse(self, lib.path, file, fat_arch); var has_parse_error = false; for (archive.objects.items) |extracted| { @@ -1058,18 +1050,9 @@ fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) const file = try std.fs.cwd().openFile(lib.path, .{}); defer file.close(); - const data = if (fat_arch) |arch| blk: { - try file.seekTo(arch.offset); - const data = try gpa.alloc(u8, arch.size); - const nread = try file.readAll(data); - if (nread != arch.size) return error.InputOutput; - break :blk data; - } else try file.readToEndAlloc(gpa, std.math.maxInt(u32)); - const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ .path = try gpa.dupe(u8, lib.path), - .data = data, .index = index, .needed = lib.needed, .weak = lib.weak, @@ -1077,7 +1060,7 @@ fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch) .explicit = explicit, } }); const dylib = &self.files.items(.data)[index].dylib; - try dylib.parse(self); + try dylib.parse(self, file, fat_arch); try self.dylibs.append(gpa, index); @@ -1098,7 +1081,6 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ .path = try gpa.dupe(u8, lib.path), - .data = &[0]u8{}, .index = index, .needed = lib.needed, .weak = lib.weak, @@ -1404,6 +1386,8 @@ pub fn resolveSymbols(self: *MachO) !void { const index = self.objects.items[i]; if (!self.getFile(index).?.object.alive) { _ = self.objects.orderedRemove(i); + self.files.items(.data)[index].object.deinit(self.base.comp.gpa); + self.files.set(index, .null); } else i += 1; } @@ -1511,18 +1495,13 @@ fn createObjcSections(self: *MachO) !void { } for (objc_msgsend_syms.keys()) |sym_index| { + const internal = self.getInternalObject().?; const sym = self.getSymbol(sym_index); - sym.value = 0; - sym.atom = 0; - sym.nlist_idx = 0; - sym.file = self.internal_object.?; - sym.flags = .{}; + _ = try internal.addSymbol(sym.getName(self), self); sym.visibility = .hidden; - const object = self.getInternalObject().?; const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; - const selrefs_index = try object.addObjcMsgsendSections(name, self); + const selrefs_index = try internal.addObjcMsgsendSections(name, self); try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self); - try object.symbols.append(gpa, sym_index); } } @@ -1659,6 +1638,8 @@ fn deadStripDylibs(self: *MachO) void { const index = self.dylibs.items[i]; if (!self.getFile(index).?.dylib.isAlive(self)) { _ = self.dylibs.orderedRemove(i); + self.files.items(.data)[index].dylib.deinit(self.base.comp.gpa); + self.files.set(index, .null); } else i += 1; } } @@ -2609,13 +2590,13 @@ fn writeAtoms(self: *MachO) !void { const atom = self.getAtom(atom_index).?; assert(atom.flags.alive); const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow; - const data = switch (atom.getFile(self)) { - .object => |x| try x.getAtomData(atom.*), - .zig_object => |x| try x.getAtomDataAlloc(self, arena.allocator(), atom.*), - else => unreachable, - }; const atom_size = math.cast(usize, atom.size) orelse return error.Overflow; - @memcpy(buffer[off..][0..atom_size], data); + switch (atom.getFile(self)) { + .internal => |x| try x.getAtomData(atom.*, buffer[off..][0..atom_size]), + .object => |x| try x.getAtomData(atom.*, buffer[off..][0..atom_size]), + .zig_object => |x| try x.getAtomData(self, atom.*, buffer[off..][0..atom_size]), + else => unreachable, + } atom.resolveRelocs(self, buffer[off..][0..atom_size]) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, else => |e| return e, @@ -3734,6 +3715,7 @@ pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult { const index = try self.addSymbol(); const global = self.getSymbol(index); global.name = off; + global.flags.global = true; gop.value_ptr.* = index; } return .{ diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 7203d89b94..29cc64e719 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -1,6 +1,3 @@ -path: []const u8, -data: []const u8, - objects: std.ArrayListUnmanaged(Object) = .{}, // Archive files start with the ARMAG identifying string. Then follows a @@ -73,62 +70,73 @@ pub fn isArchive(path: []const u8, fat_arch: ?fat.Arch) !bool { } pub fn deinit(self: *Archive, allocator: Allocator) void { - allocator.free(self.data); - allocator.free(self.path); self.objects.deinit(allocator); } -pub fn parse(self: *Archive, macho_file: *MachO) !void { +pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.File, fat_arch: ?fat.Arch) !void { const gpa = macho_file.base.comp.gpa; var arena = std.heap.ArenaAllocator.init(gpa); defer arena.deinit(); - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); - _ = try reader.readBytesNoEof(SARMAG); + const offset = if (fat_arch) |ar| ar.offset else 0; + const size = if (fat_arch) |ar| ar.size else (try file.stat()).size; + try file.seekTo(offset); + + const reader = file.reader(); + _ = try reader.readBytesNoEof(Archive.SARMAG); + var pos: usize = Archive.SARMAG; while (true) { - if (stream.pos >= self.data.len) break; - if (!mem.isAligned(stream.pos, 2)) stream.pos += 1; + if (pos >= size) break; + if (!mem.isAligned(pos, 2)) { + try file.seekBy(1); + pos += 1; + } const hdr = try reader.readStruct(ar_hdr); + pos += @sizeOf(ar_hdr); if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { - try macho_file.reportParseError(self.path, "invalid header delimiter: expected '{s}', found '{s}'", .{ + try macho_file.reportParseError(path, "invalid header delimiter: expected '{s}', found '{s}'", .{ std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), }); return error.MalformedArchive; } - var size = try hdr.size(); + var hdr_size = try hdr.size(); const name = name: { if (hdr.name()) |n| break :name n; if (try hdr.nameLength()) |len| { - size -= len; + hdr_size -= len; const buf = try arena.allocator().alloc(u8, len); try reader.readNoEof(buf); + pos += len; const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; break :name buf[0..actual_len]; } unreachable; }; defer { - _ = stream.seekBy(size) catch {}; + _ = file.seekBy(hdr_size) catch {}; + pos += hdr_size; } if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; const object = Object{ - .archive = try gpa.dupe(u8, self.path), + .archive = .{ + .path = try gpa.dupe(u8, path), + .offset = offset + pos, + }, .path = try gpa.dupe(u8, name), - .data = try gpa.dupe(u8, self.data[stream.pos..][0..size]), + .file = try std.fs.cwd().openFile(path, .{}), .index = undefined, .alive = false, .mtime = hdr.date() catch 0, }; - log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path }); + log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, path }); try self.objects.append(gpa, object); } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 57fb67f505..cbd98490fa 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -43,7 +43,11 @@ prev_index: Index = 0, next_index: Index = 0, pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { - return macho_file.strings.getAssumeExists(self.name); + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + .zig_object => |x| x.strtab.getAssumeExists(self.name), + inline else => |x| x.getString(self.name), + }; } pub fn getFile(self: Atom, macho_file: *MachO) File { @@ -52,17 +56,17 @@ pub fn getFile(self: Atom, macho_file: *MachO) File { pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { return switch (self.getFile(macho_file)) { - .zig_object => |x| x.getAtomRelocs(self), - .object => |x| x.getAtomRelocs(self), - else => unreachable, + .dylib => unreachable, + inline else => |x| x.getAtomRelocs(self), }; } pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 { return switch (self.getFile(macho_file)) { + .dylib => unreachable, .zig_object => |x| x.getInputSection(self, macho_file), .object => |x| x.sections.items(.header)[self.n_sect], - else => unreachable, + .internal => |x| x.sections.items(.header)[self.n_sect], }; } diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig index 036738225d..9974386bb7 100644 --- a/src/link/MachO/DwarfInfo.zig +++ b/src/link/MachO/DwarfInfo.zig @@ -1,15 +1,17 @@ -debug_info: []const u8, -debug_abbrev: []const u8, -debug_str: []const u8, - /// Abbreviation table indexed by offset in the .debug_abbrev bytestream abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{}, /// List of compile units as they appear in the .debug_info bytestream compile_units: std.ArrayListUnmanaged(CompileUnit) = .{}, - -pub fn init(dw: *DwarfInfo, allocator: Allocator) !void { - try dw.parseAbbrevTables(allocator); - try dw.parseCompileUnits(allocator); +/// Debug info string table +strtab: std.ArrayListUnmanaged(u8) = .{}, +/// Debug info data +di_data: std.ArrayListUnmanaged(u8) = .{}, + +pub fn init(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { + try dw.strtab.ensureTotalCapacityPrecise(allocator, di.debug_str.len); + dw.strtab.appendSliceAssumeCapacity(di.debug_str); + try dw.parseAbbrevTables(allocator, di); + try dw.parseCompileUnits(allocator, di); } pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { @@ -18,18 +20,27 @@ pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { cu.deinit(allocator); } dw.compile_units.deinit(allocator); + dw.strtab.deinit(allocator); + dw.di_data.deinit(allocator); +} + +fn appendDiData(dw: *DwarfInfo, allocator: Allocator, values: []const u8) error{OutOfMemory}!u32 { + const index: u32 = @intCast(dw.di_data.items.len); + try dw.di_data.ensureUnusedCapacity(allocator, values.len); + dw.di_data.appendSliceAssumeCapacity(values); + return index; } fn getString(dw: DwarfInfo, off: usize) [:0]const u8 { - assert(off < dw.debug_str.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0); + assert(off < dw.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.strtab.items.ptr + off)), 0); } -fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { +fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { const tracy = trace(@src()); defer tracy.end(); - const debug_abbrev = dw.debug_abbrev; + const debug_abbrev = di.debug_abbrev; var stream = std.io.fixedBufferStream(debug_abbrev); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); @@ -77,11 +88,11 @@ fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { } } -fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { +fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { const tracy = trace(@src()); defer tracy.end(); - const debug_info = dw.debug_info; + const debug_info = di.debug_info; var stream = std.io.fixedBufferStream(debug_info); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); @@ -107,7 +118,7 @@ fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { cu.header.address_size = try reader.readInt(u8, .little); const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?; - try dw.parseDie(allocator, cu, table, null, &creader); + try dw.parseDie(allocator, cu, table, di, null, &creader); } } @@ -116,6 +127,7 @@ fn parseDie( allocator: Allocator, cu: *CompileUnit, table: AbbrevTable, + di: DebugInfo, parent: ?u32, creader: anytype, ) anyerror!void { @@ -140,19 +152,20 @@ fn parseDie( } const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors - const data = dw.debug_info; + const data = di.debug_info; try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); for (decl.attrs.values()) |attr| { const start = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; try advanceByFormSize(cu, attr.form, creader); const end = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow; - cu.diePtr(die).values.appendAssumeCapacity(data[start..end]); + const index = try dw.appendDiData(allocator, data[start..end]); + cu.diePtr(die).values.appendAssumeCapacity(.{ .index = index, .len = @intCast(end - start) }); } if (decl.children) { // Open scope - try dw.parseDie(allocator, cu, table, die, creader); + try dw.parseDie(allocator, cu, table, di, die, creader); } } } @@ -340,7 +353,7 @@ pub const CompileUnit = struct { pub const Die = struct { code: Code, - values: std.ArrayListUnmanaged([]const u8) = .{}, + values: std.ArrayListUnmanaged(struct { index: u32, len: u32 }) = .{}, children: std.ArrayListUnmanaged(Die.Index) = .{}, pub fn deinit(die: *Die, gpa: Allocator) void { @@ -354,7 +367,7 @@ pub const Die = struct { const index = decl.attrs.getIndex(at) orelse return null; const attr = decl.attrs.values()[index]; const value = die.values.items[index]; - return .{ .attr = attr, .bytes = value }; + return .{ .attr = attr, .bytes = ctx.di_data.items[value.index..][0..value.len] }; } pub const Index = u32; @@ -458,6 +471,12 @@ pub const Format = enum { dwarf64, }; +const DebugInfo = struct { + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, +}; + const assert = std.debug.assert; const dwarf = std.dwarf; const leb = std.leb; diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 363ec2e3f9..abad323213 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,8 +1,6 @@ path: []const u8, -data: []const u8, index: File.Index, -header: ?macho.mach_header_64 = null, exports: std.MultiArrayList(Export) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, id: ?Id = null, @@ -34,7 +32,6 @@ pub fn isDylib(path: []const u8, fat_arch: ?fat.Arch) !bool { } pub fn deinit(self: *Dylib, allocator: Allocator) void { - allocator.free(self.data); allocator.free(self.path); self.exports.deinit(allocator); self.strtab.deinit(allocator); @@ -44,22 +41,29 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void { id.deinit(allocator); } self.dependents.deinit(allocator); + for (self.rpaths.keys()) |rpath| { + allocator.free(rpath); + } self.rpaths.deinit(allocator); } -pub fn parse(self: *Dylib, macho_file: *MachO) !void { +pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat.Arch) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.comp.gpa; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + const offset = if (fat_arch) |ar| ar.offset else 0; log.debug("parsing dylib from binary", .{}); - self.header = try reader.readStruct(macho.mach_header_64); + var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; + { + const amt = try file.preadAll(&header_buffer, offset); + if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput; + } + const header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; - const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { + const this_cpu_arch: std.Target.Cpu.Arch = switch (header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => |x| { @@ -72,39 +76,60 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void { return error.InvalidCpuArch; } - const lc_id = self.getLoadCommand(.ID_DYLIB) orelse { - try macho_file.reportParseError2(self.index, "missing LC_ID_DYLIB load command", .{}); - return error.MalformedDylib; - }; - self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName()); + const lc_buffer = try gpa.alloc(u8, header.sizeofcmds); + defer gpa.free(lc_buffer); + { + const amt = try file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); + if (amt != lc_buffer.len) return error.InputOutput; + } var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + .ncmds = header.ncmds, + .buffer = lc_buffer, }; while (it.next()) |cmd| switch (cmd.cmd()) { - .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { + .ID_DYLIB => { + self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); + }, + .REEXPORT_DYLIB => if (header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); try self.dependents.append(gpa, id); }, .DYLD_INFO_ONLY => { const dyld_cmd = cmd.cast(macho.dyld_info_command).?; - const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size]; + const data = try gpa.alloc(u8, dyld_cmd.export_size); + defer gpa.free(data); + const amt = try file.preadAll(data, dyld_cmd.export_off + offset); + if (amt != data.len) return error.InputOutput; try self.parseTrie(data, macho_file); }, .DYLD_EXPORTS_TRIE => { const ld_cmd = cmd.cast(macho.linkedit_data_command).?; - const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize]; + const data = try gpa.alloc(u8, ld_cmd.datasize); + defer gpa.free(data); + const amt = try file.preadAll(data, ld_cmd.dataoff + offset); + if (amt != data.len) return error.InputOutput; try self.parseTrie(data, macho_file); }, .RPATH => { const path = cmd.getRpathPathName(); - try self.rpaths.put(gpa, path, {}); + try self.rpaths.put(gpa, try gpa.dupe(u8, path), {}); + }, + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => { + self.platform = MachO.Platform.fromLoadCommand(cmd); }, else => {}, }; - self.initPlatform(); + if (self.id == null) { + try macho_file.reportParseError2(self.index, "missing LC_ID_DYLIB load command", .{}); + return error.MalformedDylib; + } if (self.platform) |platform| { if (!macho_file.platform.eqlTarget(platform)) { @@ -168,7 +193,7 @@ const TrieIterator = struct { pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void { try self.exports.append(allocator, .{ - .name = try self.insertString(allocator, name), + .name = try self.addString(allocator, name), .flags = flags, }); } @@ -479,24 +504,6 @@ pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { } } -fn initPlatform(self: *Dylib) void { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - self.platform = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => break MachO.Platform.fromLoadCommand(cmd), - else => {}, - } - } else null; -} - pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); @@ -526,8 +533,10 @@ pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { for (self.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); const name = sym.name; + const global = sym.flags.global; sym.* = .{}; sym.name = name; + sym.flags.global = global; } } @@ -589,17 +598,7 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { return macho_file.getFile(self.umbrella).?.dylib; } -fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - if (cmd.cmd() == lc) return cmd; - } else return null; -} - -fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { +fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { const off = @as(u32, @intCast(self.strtab.items.len)); try self.strtab.writer(allocator).print("{s}\x00", .{name}); return off; diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index 88663c2e37..43a02c5969 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -3,6 +3,7 @@ index: File.Index, sections: std.MultiArrayList(Section) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, objc_methnames: std.ArrayListUnmanaged(u8) = .{}, objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), @@ -16,6 +17,7 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void { self.sections.deinit(allocator); self.atoms.deinit(allocator); self.symbols.deinit(allocator); + self.strtab.deinit(allocator); self.objc_methnames.deinit(allocator); } @@ -26,7 +28,11 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) const gop = try macho_file.getOrCreateGlobal(off); self.symbols.addOneAssumeCapacity().* = gop.index; const sym = macho_file.getSymbol(gop.index); - sym.* = .{ .name = off, .file = self.index }; + sym.file = self.index; + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = 0; + sym.flags = .{ .global = true }; return gop.index; } @@ -45,7 +51,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.strings.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = methname.len + 1; atom.alignment = .@"1"; @@ -79,7 +85,7 @@ fn addObjcSelrefsSection( defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.strings.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = @sizeOf(u64); atom.alignment = .@"8"; @@ -158,16 +164,36 @@ fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8, return n_sect; } -pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 { +pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) !void { + assert(buffer.len == atom.size); const slice = self.sections.slice(); - assert(index < slice.items(.header).len); - const sect = slice.items(.header)[index]; - const extra = slice.items(.extra)[index]; - if (extra.is_objc_methname) { - return self.objc_methnames.items[sect.offset..][0..sect.size]; - } else if (extra.is_objc_selref) { - return &self.objc_selrefs; - } else @panic("ref to non-existent section"); + const sect = slice.items(.header)[atom.n_sect]; + const extra = slice.items(.extra)[atom.n_sect]; + const data = if (extra.is_objc_methname) + self.objc_methnames.items[sect.offset..][0..sect.size] + else if (extra.is_objc_selref) + &self.objc_selrefs + else + @panic("ref to non-existent section"); + @memcpy(buffer, data[atom.off..][0..atom.size]); +} + +pub fn getAtomRelocs(self: *const InternalObject, atom: Atom) []const Relocation { + const relocs = self.sections.items(.relocs)[atom.n_sect]; + return relocs.items[atom.relocs.pos..][0..atom.relocs.len]; +} + +fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { + const off: u32 = @intCast(self.strtab.items.len); + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return off; +} + +pub fn getString(self: InternalObject, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); } pub fn asFile(self: *InternalObject) File { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 9aecf0a78e..c6e8578dc5 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,13 +1,13 @@ -archive: ?[]const u8 = null, +archive: ?Archive = null, path: []const u8, +file: std.fs.File, mtime: u64, -data: []const u8, index: File.Index, header: ?macho.mach_header_64 = null, sections: std.MultiArrayList(Section) = .{}, symtab: std.MultiArrayList(Nlist) = .{}, -strtab: []const u8 = &[0]u8{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, @@ -22,6 +22,7 @@ cies: std.ArrayListUnmanaged(Cie) = .{}, fdes: std.ArrayListUnmanaged(Fde) = .{}, eh_frame_data: std.ArrayListUnmanaged(u8) = .{}, unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, +data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, alive: bool = true, hidden: bool = false, @@ -29,6 +30,11 @@ hidden: bool = false, dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, +const Archive = struct { + path: []const u8, + offset: u64, +}; + pub fn isObject(path: []const u8) !bool { const file = try std.fs.cwd().openFile(path, .{}); defer file.close(); @@ -37,12 +43,16 @@ pub fn isObject(path: []const u8) !bool { } pub fn deinit(self: *Object, allocator: Allocator) void { + self.file.close(); + if (self.archive) |*ar| allocator.free(ar.path); + allocator.free(self.path); for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { relocs.deinit(allocator); sub.deinit(allocator); } self.sections.deinit(allocator); self.symtab.deinit(allocator); + self.strtab.deinit(allocator); self.symbols.deinit(allocator); self.atoms.deinit(allocator); self.cies.deinit(allocator); @@ -54,7 +64,7 @@ pub fn deinit(self: *Object, allocator: Allocator) void { sf.stabs.deinit(allocator); } self.stab_files.deinit(allocator); - allocator.free(self.data); + self.data_in_code.deinit(allocator); } pub fn parse(self: *Object, macho_file: *MachO) !void { @@ -62,10 +72,14 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { defer tracy.end(); const gpa = macho_file.base.comp.gpa; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + const offset = if (self.archive) |ar| ar.offset else 0; - self.header = try reader.readStruct(macho.mach_header_64); + var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; + { + const amt = try self.file.preadAll(&header_buffer, offset); + if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput; + } + self.header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, @@ -80,35 +94,79 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { return error.InvalidCpuArch; } - if (self.getLoadCommand(.SEGMENT_64)) |lc| { - const sections = lc.getSections(); - try self.sections.ensureUnusedCapacity(gpa, sections.len); - for (sections) |sect| { - const index = try self.sections.addOne(gpa); - self.sections.set(index, .{ .header = sect }); - - if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - self.eh_frame_sect_index = @intCast(index); - } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { - self.compact_unwind_sect_index = @intCast(index); - } - } - } - if (self.getLoadCommand(.SYMTAB)) |lc| { - const cmd = lc.cast(macho.symtab_command).?; - self.strtab = self.data[cmd.stroff..][0..cmd.strsize]; - - const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms]; - try self.symtab.ensureUnusedCapacity(gpa, symtab.len); - for (symtab) |nlist| { - self.symtab.appendAssumeCapacity(.{ - .nlist = nlist, - .atom = 0, - .size = 0, - }); - } + const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); + defer gpa.free(lc_buffer); + { + const amt = try self.file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); + if (amt != self.header.?.sizeofcmds) return error.InputOutput; } + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = lc_buffer, + }; + while (it.next()) |lc| switch (lc.cmd()) { + .SEGMENT_64 => { + const sections = lc.getSections(); + try self.sections.ensureUnusedCapacity(gpa, sections.len); + for (sections) |sect| { + const index = try self.sections.addOne(gpa); + self.sections.set(index, .{ .header = sect }); + + if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + self.eh_frame_sect_index = @intCast(index); + } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { + self.compact_unwind_sect_index = @intCast(index); + } + } + }, + .SYMTAB => { + const cmd = lc.cast(macho.symtab_command).?; + try self.strtab.resize(gpa, cmd.strsize); + { + const amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset); + if (amt != self.strtab.items.len) return error.InputOutput; + } + + const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64)); + defer gpa.free(symtab_buffer); + { + const amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset); + if (amt != symtab_buffer.len) return error.InputOutput; + } + const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms]; + try self.symtab.ensureUnusedCapacity(gpa, symtab.len); + for (symtab) |nlist| { + self.symtab.appendAssumeCapacity(.{ + .nlist = nlist, + .atom = 0, + .size = 0, + }); + } + }, + .DATA_IN_CODE => { + const cmd = lc.cast(macho.linkedit_data_command).?; + const buffer = try gpa.alloc(u8, cmd.datasize); + defer gpa.free(buffer); + { + const amt = try self.file.preadAll(buffer, offset + cmd.dataoff); + if (amt != buffer.len) return error.InputOutput; + } + const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); + const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(buffer.ptr))[0..ndice]; + try self.data_in_code.appendUnalignedSlice(gpa, dice); + }, + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => if (self.platform == null) { + self.platform = MachO.Platform.fromLoadCommand(lc); + }, + else => {}, + }; + const NlistIdx = struct { nlist: macho.nlist_64, idx: usize, @@ -170,8 +228,6 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { try self.parseUnwindRecords(macho_file); } - self.initPlatform(); - if (self.platform) |platform| { if (!macho_file.platform.eqlTarget(platform)) { try macho_file.reportParseError2(self.index, "invalid platform: {}", .{ @@ -237,7 +293,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { defer gpa.free(name); const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr; const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = size, @@ -267,7 +323,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { else sect.@"align"; const atom_index = try self.addAtom(.{ - .name = self.getString(nlist.nlist.n_strx), + .name = nlist.nlist.n_strx, .n_sect = @intCast(n_sect), .off = nlist.nlist.n_value - sect.addr, .size = size, @@ -300,7 +356,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { defer gpa.free(name); const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = sect.size, @@ -336,7 +392,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } const AddAtomArgs = struct { - name: [:0]const u8, + name: u32, n_sect: u8, off: u64, size: u64, @@ -349,7 +405,7 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { const atom = macho_file.getAtom(atom_index).?; atom.file = self.index; atom.atom_index = atom_index; - atom.name = try macho_file.strings.insert(gpa, args.name); + atom.name = args.name; atom.n_sect = args.n_sect; atom.size = args.size; atom.alignment = Atom.Alignment.fromLog2Units(args.alignment); @@ -376,7 +432,7 @@ fn initLiteralSections(self: *Object, macho_file: *MachO) !void { defer gpa.free(name); const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = sect.size, @@ -475,10 +531,9 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void { const index = try macho_file.addSymbol(); self.symbols.appendAssumeCapacity(index); const symbol = macho_file.getSymbol(index); - const name = self.getString(nlist.n_strx); symbol.* = .{ .value = nlist.n_value, - .name = try macho_file.strings.insert(gpa, name), + .name = nlist.n_strx, .nlist_idx = @intCast(i), .atom = 0, .file = self.index, @@ -638,7 +693,10 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const sect = slice.items(.header)[sect_id]; const relocs = slice.items(.relocs)[sect_id]; - const data = try self.getSectionData(sect_id); + // TODO: read into buffer directly + const data = try self.getSectionData(gpa, sect_id); + defer gpa.free(data); + try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); self.eh_frame_data.appendSliceAssumeCapacity(data); @@ -739,7 +797,8 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { }; const gpa = macho_file.base.comp.gpa; - const data = try self.getSectionData(sect_id); + const data = try self.getSectionData(gpa, sect_id); + defer gpa.free(data); const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; const sym_lookup = SymbolLookup{ .ctx = self }; @@ -934,24 +993,6 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { } } -fn initPlatform(self: *Object) void { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - self.platform = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => break MachO.Platform.fromLoadCommand(cmd), - else => {}, - } - } else null; -} - /// Currently, we only check if a compile unit for this input object file exists /// and record that so that we can emit symbol stabs. /// TODO in the future, we want parse debug info and debug line sections so that @@ -975,12 +1016,20 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { if (debug_info_index == null or debug_abbrev_index == null) return; - var dwarf_info = DwarfInfo{ - .debug_info = try self.getSectionData(@intCast(debug_info_index.?)), - .debug_abbrev = try self.getSectionData(@intCast(debug_abbrev_index.?)), - .debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index)) else "", - }; - dwarf_info.init(gpa) catch { + const debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?)); + defer gpa.free(debug_info); + const debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?)); + defer gpa.free(debug_abbrev); + const debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{}; + defer gpa.free(debug_str); + + var dwarf_info = DwarfInfo{}; + errdefer dwarf_info.deinit(gpa); + dwarf_info.init(gpa, .{ + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + }) catch { try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{}); return error.MalformedObject; }; @@ -1049,8 +1098,10 @@ pub fn resetGlobals(self: *Object, macho_file: *MachO) void { if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; const sym = macho_file.getSymbol(sym_index); const name = sym.name; + const global = sym.flags.global; sym.* = .{}; sym.name = name; + sym.flags.global = global; } } @@ -1137,7 +1188,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.strings.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = nlist.n_value; atom.alignment = Atom.Alignment.fromLog2Units((nlist.n_desc >> 8) & 0x0f); @@ -1151,6 +1202,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { sym.value = 0; sym.atom = atom_index; + sym.flags.global = true; sym.flags.weak = false; sym.flags.weak_ref = false; sym.flags.tentative = false; @@ -1219,8 +1271,8 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) error{Overflow}!void { self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name - if (self.archive) |path| { - self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1)); + if (self.archive) |ar| { + self.output_symtab_ctx.strsize += @as(u32, @intCast(ar.path.len + 1 + self.path.len + 1 + 1)); } else { self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1)); } @@ -1365,8 +1417,8 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) error{Overflow}!void index += 1; // N_OSO path n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - if (self.archive) |path| { - macho_file.strtab.appendSliceAssumeCapacity(path); + if (self.archive) |ar| { + macho_file.strtab.appendSliceAssumeCapacity(ar.path); macho_file.strtab.appendAssumeCapacity('('); macho_file.strtab.appendSliceAssumeCapacity(self.path); macho_file.strtab.appendAssumeCapacity(')'); @@ -1532,30 +1584,25 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) error{Overflow}!void } } -fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - if (cmd.cmd() == lc) return cmd; - } else return null; -} - -pub fn getSectionData(self: *const Object, index: u32) error{Overflow}![]const u8 { +fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 { const slice = self.sections.slice(); assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; - const off = math.cast(usize, sect.offset) orelse return error.Overflow; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - return self.data[off..][0..size]; + const offset = if (self.archive) |ar| ar.offset else 0; + const buffer = try allocator.alloc(u8, sect.size); + errdefer allocator.free(buffer); + const amt = try self.file.preadAll(buffer, sect.offset + offset); + if (amt != buffer.len) return error.InputOutput; + return buffer; } -pub fn getAtomData(self: *const Object, atom: Atom) error{Overflow}![]const u8 { - const data = try self.getSectionData(atom.n_sect); - const off = math.cast(usize, atom.off) orelse return error.Overflow; - const size = math.cast(usize, atom.size) orelse return error.Overflow; - return data[off..][0..size]; +pub fn getAtomData(self: *const Object, atom: Atom, buffer: []u8) !void { + assert(buffer.len == atom.size); + const slice = self.sections.slice(); + const offset = if (self.archive) |ar| ar.offset else 0; + const sect = slice.items(.header)[atom.n_sect]; + const amt = try self.file.preadAll(buffer, sect.offset + offset + atom.off); + if (amt != buffer.len) return error.InputOutput; } pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation { @@ -1563,9 +1610,17 @@ pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation { return relocs.items[atom.relocs.pos..][0..atom.relocs.len]; } -fn getString(self: Object, off: u32) [:0]const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); +fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { + const off: u32 = @intCast(self.strtab.items.len); + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return off; +} + +pub fn getString(self: Object, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); } pub fn hasUnwindRecords(self: Object) bool { @@ -1600,15 +1655,8 @@ pub fn hasObjc(self: Object) bool { return false; } -pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry { - const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{}; - const cmd = lc.cast(macho.linkedit_data_command).?; - const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); - const dice = @as( - [*]align(1) const macho.data_in_code_entry, - @ptrCast(self.data.ptr + cmd.dataoff), - )[0..ndice]; - return dice; +pub fn getDataInCode(self: Object) []const macho.data_in_code_entry { + return self.data_in_code.items; } pub inline fn hasSubsections(self: Object) bool { @@ -1762,8 +1810,8 @@ fn formatPath( ) !void { _ = unused_fmt_string; _ = options; - if (object.archive) |path| { - try writer.writeAll(path); + if (object.archive) |ar| { + try writer.writeAll(ar.path); try writer.writeByte('('); try writer.writeAll(object.path); try writer.writeByte(')'); @@ -1831,11 +1879,17 @@ const x86_64 = struct { ) !void { const gpa = macho_file.base.comp.gpa; - const relocs = @as( - [*]align(1) const macho.relocation_info, - @ptrCast(self.data.ptr + sect.reloff), - )[0..sect.nreloc]; - const code = try self.getSectionData(@intCast(n_sect)); + const offset = if (self.archive) |ar| ar.offset else 0; + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); + { + const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset); + if (amt != relocs_buffer.len) return error.InputOutput; + } + const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; + + const code = try self.getSectionData(gpa, @intCast(n_sect)); + defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -1987,11 +2041,17 @@ const aarch64 = struct { ) !void { const gpa = macho_file.base.comp.gpa; - const relocs = @as( - [*]align(1) const macho.relocation_info, - @ptrCast(self.data.ptr + sect.reloff), - )[0..sect.nreloc]; - const code = try self.getSectionData(@intCast(n_sect)); + const offset = if (self.archive) |ar| ar.offset else 0; + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); + { + const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset); + if (amt != relocs_buffer.len) return error.InputOutput; + } + const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; + + const code = try self.getSectionData(gpa, @intCast(n_sect)); + defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index e8a8a561b7..a61e6f9579 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -55,7 +55,12 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { } pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { - return macho_file.strings.getAssumeExists(symbol.name); + if (symbol.flags.global) return macho_file.strings.getAssumeExists(symbol.name); + return switch (symbol.getFile(macho_file).?) { + .dylib => unreachable, // There are no local symbols for dylibs + .zig_object => |x| x.strtab.getAssumeExists(symbol.name), + inline else => |x| x.getString(symbol.name), + }; } pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { @@ -341,6 +346,11 @@ pub const Flags = packed struct { /// Whether the symbol is exported at runtime. @"export": bool = false, + /// Whether the symbol is effectively an extern and takes part in global + /// symbol resolution. Then, its name will be saved in global string interning + /// table. + global: bool = false, + /// Whether this symbol is weak. weak: bool = false, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 6f55a077b5..604322eedf 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -3,6 +3,7 @@ path: []const u8, index: File.Index, symtab: std.MultiArrayList(Nlist) = .{}, +strtab: StringTable = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, @@ -52,10 +53,12 @@ pub fn init(self: *ZigObject, macho_file: *MachO) !void { const gpa = comp.gpa; try self.atoms.append(gpa, 0); // null input section + try self.strtab.buffer.append(gpa, 0); } pub fn deinit(self: *ZigObject, allocator: Allocator) void { self.symtab.deinit(allocator); + self.strtab.deinit(allocator); self.symbols.deinit(allocator); self.atoms.deinit(allocator); self.globals_lookup.deinit(allocator); @@ -136,37 +139,24 @@ pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index { return symbol_index; } -/// Caller owns the memory. -pub fn getAtomDataAlloc( - self: ZigObject, - macho_file: *MachO, - allocator: Allocator, - atom: Atom, -) ![]u8 { +pub fn getAtomData(self: ZigObject, macho_file: *MachO, atom: Atom, buffer: []u8) !void { assert(atom.file == self.index); + assert(atom.size == buffer.len); const sect = macho_file.sections.items(.header)[atom.out_n_sect]; assert(!sect.isZerofill()); switch (sect.type()) { macho.S_THREAD_LOCAL_REGULAR => { const tlv = self.tlv_initializers.get(atom.atom_index).?; - const data = try allocator.dupe(u8, tlv.data); - return data; + @memcpy(buffer, tlv.data); }, macho.S_THREAD_LOCAL_VARIABLES => { - const size = std.math.cast(usize, atom.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, size); - @memset(data, 0); - return data; + @memset(buffer, 0); }, else => { const file_offset = sect.offset + atom.value - sect.addr; - const size = std.math.cast(usize, atom.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, size); - errdefer allocator.free(data); - const amt = try macho_file.base.file.?.preadAll(data, file_offset); - if (amt != data.len) return error.InputOutput; - return data; + const amt = try macho_file.base.file.?.preadAll(buffer, file_offset); + if (amt != buffer.len) return error.InputOutput; }, } } @@ -242,8 +232,10 @@ pub fn resetGlobals(self: *ZigObject, macho_file: *MachO) void { if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; const sym = macho_file.getSymbol(sym_index); const name = sym.name; + const global = sym.flags.global; sym.* = .{}; sym.name = name; + sym.flags.global = global; } } @@ -686,7 +678,7 @@ fn updateDeclCode( sym.out_n_sect = sect_index; atom.out_n_sect = sect_index; - sym.name = try macho_file.strings.insert(gpa, decl_name); + sym.name = try self.strtab.insert(gpa, decl_name); atom.flags.alive = true; atom.name = sym.name; nlist.n_strx = sym.name; @@ -796,7 +788,7 @@ fn createTlvInitializer( atom.out_n_sect = sect_index; sym.value = 0; - sym.name = try macho_file.strings.insert(gpa, sym_name); + sym.name = try self.strtab.insert(gpa, sym_name); atom.flags.alive = true; atom.name = sym.name; nlist.n_strx = sym.name; @@ -849,7 +841,7 @@ fn createTlvDescriptor( atom.out_n_sect = sect_index; sym.value = 0; - sym.name = try macho_file.strings.insert(gpa, name); + sym.name = try self.strtab.insert(gpa, name); atom.flags.alive = true; atom.name = sym.name; nlist.n_strx = sym.name; @@ -1019,7 +1011,7 @@ fn lowerConst( }; const sym = macho_file.getSymbol(sym_index); - const name_str_index = try macho_file.strings.insert(gpa, name); + const name_str_index = try self.strtab.insert(gpa, name); sym.name = name_str_index; sym.out_n_sect = output_section_index; @@ -1110,7 +1102,7 @@ pub fn updateExports( } const exp_name = mod.intern_pool.stringToSlice(exp.opts.name); - const global_nlist_index = if (metadata.@"export"(self, macho_file, exp_name)) |exp_index| + const global_nlist_index = if (metadata.@"export"(self, exp_name)) |exp_index| exp_index.* else blk: { const global_nlist_index = try self.getGlobalSymbol(macho_file, exp_name, null); @@ -1159,7 +1151,7 @@ fn updateLazySymbol( lazy_sym.ty.fmt(mod), }); defer gpa.free(name); - break :blk try macho_file.strings.insert(gpa, name); + break :blk try self.strtab.insert(gpa, name); }; const src = if (lazy_sym.ty.getOwnerDeclOrNull(mod)) |owner_decl| @@ -1247,7 +1239,7 @@ pub fn deleteDeclExport( const mod = macho_file.base.comp.module.?; const exp_name = mod.intern_pool.stringToSlice(name); - const nlist_index = metadata.@"export"(self, macho_file, exp_name) orelse return; + const nlist_index = metadata.@"export"(self, exp_name) orelse return; log.debug("deleting export '{s}'", .{exp_name}); @@ -1268,7 +1260,7 @@ pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, l const gpa = macho_file.base.comp.gpa; const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); defer gpa.free(sym_name); - const off = try macho_file.strings.insert(gpa, sym_name); + const off = try self.strtab.insert(gpa, sym_name); const lookup_gop = try self.globals_lookup.getOrPut(gpa, off); if (!lookup_gop.found_existing) { const nlist_index = try self.addNlist(gpa); @@ -1406,10 +1398,10 @@ const DeclMetadata = struct { /// A list of all exports aliases of this Decl. exports: std.ArrayListUnmanaged(Symbol.Index) = .{}, - fn @"export"(m: DeclMetadata, zig_object: *ZigObject, macho_file: *MachO, name: []const u8) ?*u32 { + fn @"export"(m: DeclMetadata, zig_object: *ZigObject, name: []const u8) ?*u32 { for (m.exports.items) |*exp| { const nlist = zig_object.symtab.items(.nlist)[exp.*]; - const exp_name = macho_file.strings.getAssumeExists(nlist.n_strx); + const exp_name = zig_object.strtab.getAssumeExists(nlist.n_strx); if (mem.eql(u8, name, exp_name)) return exp; } return null; diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 1bcbe1f3ab..0b3df180a5 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -290,8 +290,7 @@ fn writeAtoms(macho_file: *MachO) !void { assert(atom.flags.alive); const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow; const atom_size = math.cast(usize, atom.size) orelse return error.Overflow; - const atom_data = try atom.getFile(macho_file).object.getAtomData(atom.*); - @memcpy(code[off..][0..atom_size], atom_data); + try atom.getFile(macho_file).object.getAtomData(atom.*, code[off..][0..atom_size]); try atom.writeRelocs(macho_file, code[off..][0..atom_size], &relocs); } |
