From 702bcfecf5732eceada9bfbca0804c706c238d49 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 19 Aug 2023 09:07:33 +0200 Subject: macho: simplify input file parsing for both drivers --- src/link/MachO.zig | 503 +++++++++++++++++++++++++++++++++------------ src/link/MachO/Archive.zig | 36 +--- src/link/MachO/Dylib.zig | 33 ++- src/link/MachO/Object.zig | 41 +--- src/link/MachO/fat.zig | 48 +++-- src/link/MachO/zld.zig | 382 +++++----------------------------- 6 files changed, 489 insertions(+), 554 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8125c27e80..92aae67c22 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -111,6 +111,8 @@ dysymtab_cmd: macho.dysymtab_command = .{}, uuid_cmd: macho.uuid_command = .{}, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, +objects: std.ArrayListUnmanaged(Object) = .{}, +archives: std.ArrayListUnmanaged(Archive) = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, @@ -586,8 +588,30 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No parent: u16, }, .Dynamic).init(arena); - try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); - try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + for (libs.keys(), libs.values()) |path, lib| { + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); + + parseLibrary( + self, + self.base.allocator, + in_file, + path, + lib, + false, + &dependent_libs, + &self.base.options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); + continue; + }; + } + + parseDependentLibs(self, self.base.allocator, &dependent_libs, &self.base.options) catch |err| { + // TODO convert to error + log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); + }; } if (self.dyld_stub_binder_index == null) { @@ -880,175 +904,373 @@ fn resolveLib( return full_path; } -const ParseDylibError = error{ - OutOfMemory, - EmptyStubFile, - MismatchedCpuArchitecture, - UnsupportedCpuArchitecture, - EndOfStream, -} || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; +pub fn parsePositional( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + must_link: bool, + dependent_libs: anytype, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); -const DylibCreateOpts = struct { - syslibroot: ?[]const u8, + if (Object.isObject(file)) { + try parseObject(ctx, gpa, file, path, link_options); + } else { + try parseLibrary(ctx, gpa, file, path, .{ + .path = null, + .needed = false, + .weak = false, + }, must_link, dependent_libs, link_options); + } +} + +fn parseObject( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); + }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + + var object = Object{ + .name = try gpa.dupe(u8, path), + .mtime = mtime, + .contents = contents, + }; + errdefer object.deinit(gpa); + try object.parse(gpa); + try ctx.objects.append(gpa, object); + + const cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + const self_cpu_arch = link_options.target.cpu.arch; + + if (self_cpu_arch != cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ + path, + @tagName(cpu_arch), + @tagName(self_cpu_arch), + }); + } +} + +pub fn parseLibrary( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + lib: link.SystemLib, + must_link: bool, + dependent_libs: anytype, + link_options: *const link.Options, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const cpu_arch = link_options.target.cpu.arch; + + if (fat.isFatLibrary(file)) { + const offset = parseFatLibrary(ctx, file, path, cpu_arch) catch |err| switch (err) { + error.MissingArch => return, + else => |e| return e, + }; + try file.seekTo(offset); + + if (Archive.isArchive(file, offset)) { + try parseArchive(ctx, gpa, path, offset, must_link, cpu_arch); + } else if (Dylib.isDylib(file, offset)) { + try parseDylib(ctx, gpa, file, path, offset, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }); + } else { + // TODO convert into an error + log.err("{s}: unknown file type", .{path}); + return; + } + } else if (Archive.isArchive(file, 0)) { + try parseArchive(ctx, gpa, path, 0, must_link, cpu_arch); + } else if (Dylib.isDylib(file, 0)) { + try parseDylib(ctx, gpa, file, path, 0, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }); + } else { + parseLibStub(ctx, gpa, file, path, dependent_libs, link_options, .{ + .needed = lib.needed, + .weak = lib.weak, + }) catch |err| switch (err) { + error.NotLibStub, error.UnexpectedToken => { + // TODO convert into an error + log.err("{s}: unknown file type", .{path}); + return; + }, + else => |e| return e, + }; + } +} + +pub fn parseFatLibrary( + ctx: anytype, + file: std.fs.File, + path: []const u8, + cpu_arch: std.Target.Cpu.Arch, +) !u64 { + _ = ctx; + var buffer: [2]fat.Arch = undefined; + const fat_archs = try fat.parseArchs(file, &buffer); + const offset = for (fat_archs) |arch| { + if (arch.tag == cpu_arch) break arch.offset; + } else { + // TODO convert into an error + log.err("{s}: missing arch in universal file: expected {s}", .{ path, @tagName(cpu_arch) }); + return error.MissingArch; + }; + return offset; +} + +fn parseArchive( + ctx: anytype, + gpa: Allocator, + path: []const u8, + fat_offset: u64, + must_link: bool, + cpu_arch: std.Target.Cpu.Arch, +) !void { + + // We take ownership of the file so that we can store it for the duration of symbol resolution. + // TODO we shouldn't need to do that and could pre-parse the archive like we do for zld/ELF? + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + try file.seekTo(fat_offset); + + var archive = Archive{ + .file = file, + .fat_offset = fat_offset, + .name = try gpa.dupe(u8, path), + }; + errdefer archive.deinit(gpa); + + try archive.parse(gpa, file.reader()); + + // Verify arch and platform + if (archive.toc.values().len > 0) { + const offsets = archive.toc.values()[0].items; + assert(offsets.len > 0); + const off = offsets[0]; + var object = try archive.parseObject(gpa, off); // TODO we are doing all this work to pull the header only! + defer object.deinit(gpa); + + const parsed_cpu_arch: std.Target.Cpu.Arch = switch (object.header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (cpu_arch != parsed_cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture in archive '{s}', expected '{s}'", .{ + path, + @tagName(parsed_cpu_arch), + @tagName(cpu_arch), + }); + return error.MissingArch; + } + } + + if (must_link) { + // Get all offsets from the ToC + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); + defer offsets.deinit(); + for (archive.toc.values()) |offs| { + for (offs.items) |off| { + _ = try offsets.getOrPut(off); + } + } + for (offsets.keys()) |off| { + const object = try archive.parseObject(gpa, off); + try ctx.objects.append(gpa, object); + } + } else { + try ctx.archives.append(gpa, archive); + } +} + +const DylibOpts = struct { id: ?Dylib.Id = null, dependent: bool = false, needed: bool = false, weak: bool = false, }; -pub fn parseDylib( - self: *MachO, +fn parseDylib( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, path: []const u8, + offset: u64, dependent_libs: anytype, - opts: DylibCreateOpts, -) ParseDylibError!bool { - const gpa = self.base.allocator; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + const self_cpu_arch = link_options.target.cpu.arch; - const cpu_arch = self.base.options.target.cpu.arch; const file_stat = try file.stat(); var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const reader = file.reader(); - const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse - return error.Overflow; - try file.seekTo(fat_offset); - file_size -= fat_offset; + file_size -= offset; const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); defer gpa.free(contents); - const dylib_id = @as(u16, @intCast(self.dylibs.items.len)); - var dylib = Dylib{ .weak = opts.weak }; + var dylib = Dylib{ .weak = dylib_options.weak }; + errdefer dylib.deinit(gpa); - dylib.parseFromBinary( + try dylib.parseFromBinary( gpa, - cpu_arch, - dylib_id, + @intCast(ctx.dylibs.items.len), // TODO defer it till later dependent_libs, path, contents, - ) catch |err| switch (err) { - error.EndOfStream, error.NotDylib => { - try file.seekTo(0); + ); - var lib_stub = LibStub.loadFromFile(gpa, file) catch { - dylib.deinit(gpa); - return false; - }; - defer lib_stub.deinit(); - - try dylib.parseFromStub( - gpa, - self.base.options.target, - lib_stub, - dylib_id, - dependent_libs, - path, - ); - }, + const cpu_arch: std.Target.Cpu.Arch = switch (dylib.header.?.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (self_cpu_arch != cpu_arch) { + // TODO convert into an error + log.err("{s}: invalid architecture '{s}', expected '{s}'", .{ + path, + @tagName(cpu_arch), + @tagName(self_cpu_arch), + }); + return error.MissingArch; + } + + // TODO verify platform + + addDylib(ctx, gpa, dylib, link_options, .{ + .needed = dylib_options.needed, + .weak = dylib_options.weak, + }) catch |err| switch (err) { + error.DylibAlreadyExists => dylib.deinit(gpa), else => |e| return e, }; +} - if (opts.id) |id| { +fn parseLibStub( + ctx: anytype, + gpa: Allocator, + file: std.fs.File, + path: []const u8, + dependent_libs: anytype, + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + var lib_stub = try LibStub.loadFromFile(gpa, file); + defer lib_stub.deinit(); + + if (lib_stub.inner.len == 0) return error.NotLibStub; + + // TODO verify platform + + var dylib = Dylib{ .weak = dylib_options.weak }; + errdefer dylib.deinit(gpa); + + try dylib.parseFromStub( + gpa, + link_options.target, + lib_stub, + @intCast(ctx.dylibs.items.len), // TODO defer it till later + dependent_libs, + path, + ); + + addDylib(ctx, gpa, dylib, link_options, .{ + .needed = dylib_options.needed, + .weak = dylib_options.weak, + }) catch |err| switch (err) { + error.DylibAlreadyExists => dylib.deinit(gpa), + else => |e| return e, + }; +} + +fn addDylib( + ctx: anytype, + gpa: Allocator, + dylib: Dylib, + link_options: *const link.Options, + dylib_options: DylibOpts, +) !void { + if (dylib_options.id) |id| { if (dylib.id.?.current_version < id.compatibility_version) { + // TODO convert into an error log.warn("found dylib is incompatible with the required minimum version", .{}); log.warn(" dylib: {s}", .{id.name}); log.warn(" required minimum version: {}", .{id.compatibility_version}); log.warn(" dylib version: {}", .{dylib.id.?.current_version}); - - // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(gpa); - return false; + return error.IncompatibleDylibVersion; } } - try self.dylibs.append(gpa, dylib); - try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); + const gop = try ctx.dylibs_map.getOrPut(gpa, dylib.id.?.name); + if (gop.found_existing) return error.DylibAlreadyExists; + + gop.value_ptr.* = @as(u16, @intCast(ctx.dylibs.items.len)); + try ctx.dylibs.append(gpa, dylib); const should_link_dylib_even_if_unreachable = blk: { - if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; - break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); + if (link_options.dead_strip_dylibs and !dylib_options.needed) break :blk false; + break :blk !(dylib_options.dependent or ctx.referenced_dylibs.contains(gop.value_ptr.*)); }; if (should_link_dylib_even_if_unreachable) { - try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); + try ctx.referenced_dylibs.putNoClobber(gpa, gop.value_ptr.*, {}); } - - return true; } -pub fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8, dependent_libs: anytype) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing input file path '{s}'", .{full_path}); - - if (try self.parseObject(full_path)) continue; - if (try self.parseArchive(full_path, false)) continue; - if (try self.parseDylib(full_path, dependent_libs, .{ - .syslibroot = syslibroot, - })) continue; - - log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); - } -} - -pub fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing and force loading static archive '{s}'", .{full_path}); - - if (try self.parseArchive(full_path, true)) continue; - log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); - } -} - -pub fn parseLibs( - self: *MachO, - lib_names: []const []const u8, - lib_infos: []const link.SystemLib, - syslibroot: ?[]const u8, +pub fn parseDependentLibs( + ctx: anytype, + gpa: Allocator, dependent_libs: anytype, + link_options: *const link.Options, ) !void { - for (lib_names, 0..) |lib, i| { - const lib_info = lib_infos[i]; - log.debug("parsing lib path '{s}'", .{lib}); - if (try self.parseDylib(lib, dependent_libs, .{ - .syslibroot = syslibroot, - .needed = lib_info.needed, - .weak = lib_info.weak, - })) continue; - - log.debug("unknown filetype for a library: '{s}'", .{lib}); - } -} + const tracy = trace(@src()); + defer tracy.end(); -pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: anytype) !void { // At this point, we can now parse dependents of dylibs preserving the inclusion order of: // 1) anything on the linker line is parsed first // 2) afterwards, we parse dependents of the included dylibs // TODO this should not be performed if the user specifies `-flat_namespace` flag. // See ld64 manpages. - var arena_alloc = std.heap.ArenaAllocator.init(self.base.allocator); + var arena_alloc = std.heap.ArenaAllocator.init(gpa); const arena = arena_alloc.allocator(); defer arena_alloc.deinit(); - while (dependent_libs.readItem()) |*dep_id| { - defer dep_id.id.deinit(self.base.allocator); + outer: while (dependent_libs.readItem()) |dep_id| { + defer dep_id.id.deinit(gpa); - if (self.dylibs_map.contains(dep_id.id.name)) continue; + if (ctx.dylibs_map.contains(dep_id.id.name)) continue; - const weak = self.dylibs.items[dep_id.parent].weak; + const weak = ctx.dylibs.items[dep_id.parent].weak; const has_ext = blk: { const basename = fs.path.basename(dep_id.id.name); break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; @@ -1061,20 +1283,47 @@ pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: for (&[_][]const u8{ extension, ".tbd" }) |ext| { const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; + const full_path = if (link_options.sysroot) |root| + try fs.path.join(arena, &.{ root, with_ext }) + else + with_ext; + + const file = std.fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer file.close(); log.debug("trying dependency at fully resolved path {s}", .{full_path}); - const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ - .id = dep_id.id, - .syslibroot = syslibroot, - .dependent = true, - .weak = weak, - }); - if (did_parse_successfully) break; - } else { - log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); + const offset: u64 = if (fat.isFatLibrary(file)) blk: { + const offset = parseFatLibrary(ctx, file, full_path, link_options.target.cpu.arch) catch |err| switch (err) { + error.MissingArch => break, + else => |e| return e, + }; + try file.seekTo(offset); + break :blk offset; + } else 0; + + if (Dylib.isDylib(file, offset)) { + try parseDylib(ctx, gpa, file, full_path, offset, dependent_libs, link_options, .{ + .dependent = true, + .weak = weak, + }); + } else { + parseLibStub(ctx, gpa, file, full_path, dependent_libs, link_options, .{ + .dependent = true, + .weak = weak, + }) catch |err| switch (err) { + error.NotLibStub, error.UnexpectedToken => continue, + else => |e| return e, + }; + } + continue :outer; } + + // TODO convert into an error + log.err("{s}: unable to resolve dependency", .{dep_id.id.name}); } } @@ -2517,7 +2766,7 @@ fn populateMissingMetadata(self: *MachO) !void { // The first __TEXT segment is immovable and covers MachO header and load commands. self.header_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); const ideal_size = @max(self.base.options.headerpad_size orelse 0, default_headerpad_size); - const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(self.base.options.target.cpu.arch)); + const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), getPageSize(cpu_arch)); log.debug("found __TEXT segment (header-only) free space 0x{x} to 0x{x}", .{ 0, needed_size }); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 5276bf041e..f3922f6ff9 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -87,6 +87,13 @@ const ar_hdr = extern struct { } }; +pub fn isArchive(file: fs.File, fat_offset: u64) bool { + const reader = file.reader(); + const magic = reader.readBytesNoEof(SARMAG) catch return false; + defer file.seekTo(fat_offset) catch {}; + return mem.eql(u8, &magic, ARMAG); +} + pub fn deinit(self: *Archive, allocator: Allocator) void { self.file.close(); for (self.toc.keys()) |*key| { @@ -100,21 +107,8 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { } pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { - const magic = try reader.readBytesNoEof(SARMAG); - if (!mem.eql(u8, &magic, ARMAG)) { - log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); - return error.NotArchive; - } - + _ = try reader.readBytesNoEof(SARMAG); self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ - ARFMAG, - self.header.ar_fmag, - }); - return error.NotArchive; - } - const name_or_length = try self.header.nameOrLength(); var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); @@ -182,22 +176,12 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject( - self: Archive, - gpa: Allocator, - cpu_arch: std.Target.Cpu.Arch, - offset: u32, -) !Object { +pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object { const reader = self.file.reader(); try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG)) { - log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, object_header.ar_fmag }); - return error.MalformedArchive; - } - const name_or_length = try object_header.nameOrLength(); const object_name = try parseName(gpa, name_or_length, reader); defer gpa.free(object_name); @@ -227,7 +211,7 @@ pub fn parseObject( .contents = contents, }; - try object.parse(gpa, cpu_arch); + try object.parse(gpa); return object; } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 2aacf4009b..c424343a4e 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -20,6 +20,8 @@ const Tbd = tapi.Tbd; id: ?Id = null, weak: bool = false, +/// Header is only set if Dylib is parsed directly from a binary and not a stub file. +header: ?macho.mach_header_64 = null, /// Parsed symbol table represented as hash map of symbols' /// names. We can and should defer creating *Symbols until @@ -116,6 +118,13 @@ pub const Id = struct { } }; +pub fn isDylib(file: std.fs.File, fat_offset: u64) bool { + const reader = file.reader(); + const hdr = reader.readStruct(macho.mach_header_64) catch return false; + defer file.seekTo(fat_offset) catch {}; + return hdr.filetype == macho.MH_DYLIB; +} + pub fn deinit(self: *Dylib, allocator: Allocator) void { for (self.symbols.keys()) |key| { allocator.free(key); @@ -129,7 +138,6 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void { pub fn parseFromBinary( self: *Dylib, allocator: Allocator, - cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, name: []const u8, @@ -140,27 +148,12 @@ pub fn parseFromBinary( log.debug("parsing shared library '{s}'", .{name}); - const header = try reader.readStruct(macho.mach_header_64); - - if (header.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); - return error.NotDylib; - } - - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); - - if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {s}, found {s}", .{ - @tagName(cpu_arch), - @tagName(this_arch), - }); - return error.MismatchedCpuArchitecture; - } + self.header = try reader.readStruct(macho.mach_header_64); - const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; var it = LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + .ncmds = self.header.?.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], }; while (it.next()) |cmd| { switch (cmd.cmd()) { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index bbcfbb7047..fe517d11be 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -91,6 +91,13 @@ const Record = struct { reloc: Entry, }; +pub fn isObject(file: std.fs.File) bool { + const reader = file.reader(); + const hdr = reader.readStruct(macho.mach_header_64) catch return false; + defer file.seekTo(0) catch {}; + return hdr.filetype == macho.MH_OBJECT; +} + pub fn deinit(self: *Object, gpa: Allocator) void { self.atoms.deinit(gpa); self.exec_atoms.deinit(gpa); @@ -118,36 +125,12 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.data_in_code.deinit(gpa); } -pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { +pub fn parse(self: *Object, allocator: Allocator) !void { var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); self.header = try reader.readStruct(macho.mach_header_64); - if (self.header.filetype != macho.MH_OBJECT) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ - macho.MH_OBJECT, - self.header.filetype, - }); - return error.NotObject; - } - - const this_arch: std.Target.Cpu.Arch = switch (self.header.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => |value| { - log.err("unsupported cpu architecture 0x{x}", .{value}); - return error.UnsupportedCpuArchitecture; - }, - }; - if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {s}, found {s}", .{ - @tagName(cpu_arch), - @tagName(this_arch), - }); - return error.MismatchedCpuArchitecture; - } - var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], @@ -437,7 +420,7 @@ pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. const iundefsym = blk: { - const dysymtab = self.parseDysymtab() orelse { + const dysymtab = self.getDysymtab() orelse { var iundefsym: usize = self.in_symtab.?.len; while (iundefsym > 0) : (iundefsym -= 1) { const sym = self.symtab[iundefsym - 1]; @@ -945,16 +928,14 @@ fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_cod return lhs.offset < rhs.offset; } -fn parseDysymtab(self: Object) ?macho.dysymtab_command { +fn getDysymtab(self: Object) ?macho.dysymtab_command { var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], }; while (it.next()) |cmd| { switch (cmd.cmd()) { - .DYSYMTAB => { - return cmd.cast(macho.dysymtab_command).?; - }, + .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?, else => {}, } } else return null; diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 81b0685418..751e49f651 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -1,42 +1,40 @@ const std = @import("std"); +const assert = std.debug.assert; const log = std.log.scoped(.archive); const macho = std.macho; const mem = std.mem; -pub fn decodeArch(cputype: macho.cpu_type_t, comptime logError: bool) !std.Target.Cpu.Arch { - const cpu_arch: std.Target.Cpu.Arch = switch (cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => { - if (logError) { - log.err("unsupported cpu architecture 0x{x}", .{cputype}); - } - return error.UnsupportedCpuArchitecture; - }, - }; - return cpu_arch; +pub fn isFatLibrary(file: std.fs.File) bool { + const reader = file.reader(); + const hdr = reader.readStructBig(macho.fat_header) catch return false; + defer file.seekTo(0) catch {}; + return hdr.magic == macho.FAT_MAGIC; } -pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { +pub const Arch = struct { + tag: std.Target.Cpu.Arch, + offset: u64, +}; + +pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { + const reader = file.reader(); const fat_header = try reader.readStructBig(macho.fat_header); - if (fat_header.magic != macho.FAT_MAGIC) return 0; + assert(fat_header.magic == macho.FAT_MAGIC); + var count: usize = 0; var fat_arch_index: u32 = 0; while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { const fat_arch = try reader.readStructBig(macho.fat_arch); // If we come across an architecture that we do not know how to handle, that's // fine because we can keep looking for one that might match. - const lib_arch = decodeArch(fat_arch.cputype, false) catch |err| switch (err) { - error.UnsupportedCpuArchitecture => continue, + const arch: std.Target.Cpu.Arch = switch (fat_arch.cputype) { + macho.CPU_TYPE_ARM64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_ARM_ALL) .aarch64 else continue, + macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, + else => continue, }; - if (lib_arch == cpu_arch) { - // We have found a matching architecture! - return fat_arch.offset; - } - } else { - log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ - @tagName(cpu_arch), - }); - return error.MismatchedCpuArchitecture; + buffer[count] = .{ .tag = arch, .offset = fat_arch.offset }; + count += 1; } + + return buffer[0..count]; } diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index bfadd064db..6ca227c430 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -89,298 +89,6 @@ pub const Zld = struct { atoms: std.ArrayListUnmanaged(Atom) = .{}, - fn parseObject(self: *Zld, path: []const u8) !bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.options.target.cpu.arch; - const mtime: u64 = mtime: { - const stat = file.stat() catch break :mtime 0; - break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); - }; - const file_stat = try file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - - var object = Object{ - .name = name, - .mtime = mtime, - .contents = contents, - }; - - object.parse(gpa, cpu_arch) catch |err| switch (err) { - error.EndOfStream, error.NotObject => { - object.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - try self.objects.append(gpa, object); - - return true; - } - - fn parseArchive(self: *Zld, path: []const u8, force_load: bool) !bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - errdefer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.options.target.cpu.arch; - const reader = file.reader(); - const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); - try reader.context.seekTo(fat_offset); - - var archive = Archive{ - .name = name, - .fat_offset = fat_offset, - .file = file, - }; - - archive.parse(gpa, reader) catch |err| switch (err) { - error.EndOfStream, error.NotArchive => { - archive.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - if (force_load) { - defer archive.deinit(gpa); - // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(gpa); - defer offsets.deinit(); - for (archive.toc.values()) |offs| { - for (offs.items) |off| { - _ = try offsets.getOrPut(off); - } - } - for (offsets.keys()) |off| { - const object = try archive.parseObject(gpa, cpu_arch, off); - try self.objects.append(gpa, object); - } - } else { - try self.archives.append(gpa, archive); - } - - return true; - } - - const ParseDylibError = error{ - OutOfMemory, - EmptyStubFile, - MismatchedCpuArchitecture, - UnsupportedCpuArchitecture, - EndOfStream, - } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; - - const DylibCreateOpts = struct { - syslibroot: ?[]const u8, - id: ?Dylib.Id = null, - dependent: bool = false, - needed: bool = false, - weak: bool = false, - }; - - fn parseDylib( - self: *Zld, - path: []const u8, - dependent_libs: anytype, - opts: DylibCreateOpts, - ) ParseDylibError!bool { - const gpa = self.gpa; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); - - const cpu_arch = self.options.target.cpu.arch; - const file_stat = try file.stat(); - var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - - const reader = file.reader(); - const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse - return error.Overflow; - try file.seekTo(fat_offset); - file_size -= fat_offset; - - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - defer gpa.free(contents); - - const dylib_id = @as(u16, @intCast(self.dylibs.items.len)); - var dylib = Dylib{ .weak = opts.weak }; - - dylib.parseFromBinary( - gpa, - cpu_arch, - dylib_id, - dependent_libs, - path, - contents, - ) catch |err| switch (err) { - error.EndOfStream, error.NotDylib => { - try file.seekTo(0); - - var lib_stub = LibStub.loadFromFile(gpa, file) catch { - dylib.deinit(gpa); - return false; - }; - defer lib_stub.deinit(); - - try dylib.parseFromStub( - gpa, - self.options.target, - lib_stub, - dylib_id, - dependent_libs, - path, - ); - }, - else => |e| return e, - }; - - if (opts.id) |id| { - if (dylib.id.?.current_version < id.compatibility_version) { - log.warn("found dylib is incompatible with the required minimum version", .{}); - log.warn(" dylib: {s}", .{id.name}); - log.warn(" required minimum version: {}", .{id.compatibility_version}); - log.warn(" dylib version: {}", .{dylib.id.?.current_version}); - - // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(gpa); - return false; - } - } - - try self.dylibs.append(gpa, dylib); - try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); - - const should_link_dylib_even_if_unreachable = blk: { - if (self.options.dead_strip_dylibs and !opts.needed) break :blk false; - break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); - }; - - if (should_link_dylib_even_if_unreachable) { - try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); - } - - return true; - } - - fn parseInputFiles( - self: *Zld, - files: []const []const u8, - syslibroot: ?[]const u8, - dependent_libs: anytype, - ) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing input file path '{s}'", .{full_path}); - - if (try self.parseObject(full_path)) continue; - if (try self.parseArchive(full_path, false)) continue; - if (try self.parseDylib(full_path, dependent_libs, .{ - .syslibroot = syslibroot, - })) continue; - - log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); - } - } - - fn parseAndForceLoadStaticArchives(self: *Zld, files: []const []const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - break :full_path try fs.realpath(file_name, &buffer); - }; - log.debug("parsing and force loading static archive '{s}'", .{full_path}); - - if (try self.parseArchive(full_path, true)) continue; - log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); - } - } - - fn parseLibs( - self: *Zld, - lib_names: []const []const u8, - lib_infos: []const link.SystemLib, - syslibroot: ?[]const u8, - dependent_libs: anytype, - ) !void { - for (lib_names, 0..) |lib, i| { - const lib_info = lib_infos[i]; - log.debug("parsing lib path '{s}'", .{lib}); - if (try self.parseDylib(lib, dependent_libs, .{ - .syslibroot = syslibroot, - .needed = lib_info.needed, - .weak = lib_info.weak, - })) continue; - if (try self.parseArchive(lib, false)) continue; - - log.debug("unknown filetype for a library: '{s}'", .{lib}); - } - } - - fn parseDependentLibs(self: *Zld, syslibroot: ?[]const u8, dependent_libs: anytype) !void { - // At this point, we can now parse dependents of dylibs preserving the inclusion order of: - // 1) anything on the linker line is parsed first - // 2) afterwards, we parse dependents of the included dylibs - // TODO this should not be performed if the user specifies `-flat_namespace` flag. - // See ld64 manpages. - var arena_alloc = std.heap.ArenaAllocator.init(self.gpa); - const arena = arena_alloc.allocator(); - defer arena_alloc.deinit(); - - while (dependent_libs.readItem()) |*dep_id| { - defer dep_id.id.deinit(self.gpa); - - if (self.dylibs_map.contains(dep_id.id.name)) continue; - - const weak = self.dylibs.items[dep_id.parent].weak; - const has_ext = blk: { - const basename = fs.path.basename(dep_id.id.name); - break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; - }; - const extension = if (has_ext) fs.path.extension(dep_id.id.name) else ""; - const without_ext = if (has_ext) blk: { - const index = mem.lastIndexOfScalar(u8, dep_id.id.name, '.') orelse unreachable; - break :blk dep_id.id.name[0..index]; - } else dep_id.id.name; - - for (&[_][]const u8{ extension, ".tbd" }) |ext| { - const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); - const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; - - log.debug("trying dependency at fully resolved path {s}", .{full_path}); - - const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ - .id = dep_id.id, - .syslibroot = syslibroot, - .dependent = true, - .weak = weak, - }); - if (did_parse_successfully) break; - } else { - log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); - } - } - } - pub fn getOutputSection(self: *Zld, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); @@ -1009,7 +717,7 @@ pub const Zld = struct { if (self.archives.items.len == 0) return; const gpa = self.gpa; - const cpu_arch = self.options.target.cpu.arch; + var next_sym: usize = 0; loop: while (next_sym < resolver.unresolved.count()) { const global = self.globals.items[resolver.unresolved.keys()[next_sym]]; @@ -1024,13 +732,7 @@ pub const Zld = struct { assert(offsets.items.len > 0); const object_id = @as(u16, @intCast(self.objects.items.len)); - const object = archive.parseObject(gpa, cpu_arch, offsets.items[0]) catch |e| switch (e) { - error.MismatchedCpuArchitecture => { - log.err("CPU architecture mismatch found in {s}", .{archive.name}); - return e; - }, - else => return e, - }; + const object = try archive.parseObject(gpa, offsets.items[0]); try self.objects.append(gpa, object); try self.resolveSymbolsInObject(object_id, resolver); @@ -3512,37 +3214,27 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.strtab.buffer.append(gpa, 0); // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); + var positionals = std.ArrayList(Compilation.LinkObject).init(arena); try positionals.ensureUnusedCapacity(options.objects.len); - - var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(options.objects.len); - - for (options.objects) |obj| { - if (must_link_archives.contains(obj.path)) continue; - if (obj.must_link) { - _ = must_link_archives.getOrPutAssumeCapacity(obj.path); - } else { - _ = positionals.appendAssumeCapacity(obj.path); - } - } + positionals.appendSliceAssumeCapacity(options.objects); for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); + try positionals.append(.{ .path = key.status.success.object_path }); } if (module_obj_path) |p| { - try positionals.append(p); + try positionals.append(.{ .path = p }); } if (comp.compiler_rt_lib) |lib| { - try positionals.append(lib.full_object_path); + try positionals.append(.{ .path = lib.full_object_path }); } // libc++ dep if (options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); + try positionals.ensureUnusedCapacity(2); + positionals.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path }); + positionals.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path }); } var libs = std.StringArrayHashMap(link.SystemLib).init(arena); @@ -3621,6 +3313,9 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } for (options.objects) |obj| { + if (obj.must_link) { + try argv.append("-force_load"); + } try argv.append(obj.path); } @@ -3682,10 +3377,6 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append("dynamic_lookup"); } - for (must_link_archives.keys()) |lib| { - try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); - } - Compilation.dump_argv(argv.items); } @@ -3694,10 +3385,49 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr parent: u16, }, .Dynamic).init(arena); - try zld.parseInputFiles(positionals.items, options.sysroot, &dependent_libs); - try zld.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try zld.parseLibs(libs.keys(), libs.values(), options.sysroot, &dependent_libs); - try zld.parseDependentLibs(options.sysroot, &dependent_libs); + for (positionals.items) |obj| { + const in_file = try std.fs.cwd().openFile(obj.path, .{}); + defer in_file.close(); + + MachO.parsePositional( + &zld, + gpa, + in_file, + obj.path, + obj.must_link, + &dependent_libs, + options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing positional failed with err {s}", .{ obj.path, @errorName(err) }); + continue; + }; + } + + for (libs.keys(), libs.values()) |path, lib| { + const in_file = try std.fs.cwd().openFile(path, .{}); + defer in_file.close(); + + MachO.parseLibrary( + &zld, + gpa, + in_file, + path, + lib, + false, + &dependent_libs, + options, + ) catch |err| { + // TODO convert to error + log.err("{s}: parsing library failed with err {s}", .{ path, @errorName(err) }); + continue; + }; + } + + MachO.parseDependentLibs(&zld, gpa, &dependent_libs, options) catch |err| { + // TODO convert to error + log.err("parsing dependent libraries failed with err {s}", .{@errorName(err)}); + }; var resolver = SymbolResolver{ .arena = arena, -- cgit v1.2.3