diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2024-05-13 08:56:15 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2024-05-13 08:56:15 +0200 |
| commit | 5d619da2de1e6f906082640b75322553f8fef7c0 (patch) | |
| tree | 7eb11d6f3a4b3d93320ef577248c118ac1488273 /lib/std/Build/Step/CheckObject.zig | |
| parent | 6f117dbca41e4dfede925d046b572df40849a9c4 (diff) | |
| download | zig-5d619da2de1e6f906082640b75322553f8fef7c0.tar.gz zig-5d619da2de1e6f906082640b75322553f8fef7c0.zip | |
Step.CheckObject: put MachO object parsing logic in ObjectContext wrapper
Diffstat (limited to 'lib/std/Build/Step/CheckObject.zig')
| -rw-r--r-- | lib/std/Build/Step/CheckObject.zig | 1709 |
1 files changed, 850 insertions, 859 deletions
diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 0a15e22348..6997216b41 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -695,7 +695,6 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { } const MachODumper = struct { - const LoadCommandIterator = macho.LoadCommandIterator; const dyld_rebase_label = "dyld rebase data"; const dyld_bind_label = "dyld bind data"; const dyld_weak_bind_label = "dyld weak bind data"; @@ -704,80 +703,52 @@ const MachODumper = struct { const symtab_label = "symbol table"; const indirect_symtab_label = "indirect symbol table"; - const Symtab = struct { - symbols: []align(1) const macho.nlist_64 = &[0]macho.nlist_64{}, - strings: []const u8 = &[0]u8{}, - indirect_symbols: []align(1) const u32 = &[0]u32{}, - - fn getString(symtab: Symtab, off: u32) []const u8 { - assert(off < symtab.strings.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(symtab.strings.ptr + off)), 0); - } - }; - fn parseAndDump(step: *Step, check: Check, bytes: []const u8) ![]const u8 { - const gpa = step.owner.allocator; - var stream = std.io.fixedBufferStream(bytes); - const reader = stream.reader(); - - const hdr = try reader.readStruct(macho.mach_header_64); - if (hdr.magic != macho.MH_MAGIC_64) { - return error.InvalidMagicNumber; - } - - var output = std.ArrayList(u8).init(gpa); - const writer = output.writer(); + // TODO: handle archives and fat files + return parseAndDumpObject(step, check, bytes); + } - var symtab: Symtab = .{}; - var segments = std.ArrayList(macho.segment_command_64).init(gpa); - defer segments.deinit(); - var sections = std.ArrayList(macho.section_64).init(gpa); - defer sections.deinit(); - var imports = std.ArrayList([]const u8).init(gpa); - defer imports.deinit(); - var text_seg: ?u8 = null; - var dyld_info_lc: ?macho.dyld_info_command = null; - - { - var it: LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = bytes[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; + const ObjectContext = struct { + gpa: Allocator, + data: []const u8, + header: macho.mach_header_64, + segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, + sections: std.ArrayListUnmanaged(macho.section_64) = .{}, + symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + strtab: std.ArrayListUnmanaged(u8) = .{}, + indsymtab: std.ArrayListUnmanaged(u32) = .{}, + imports: std.ArrayListUnmanaged([]const u8) = .{}, + + fn parse(ctx: *ObjectContext) !void { + var it = ctx.getLoadCommandIterator(); var i: usize = 0; while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { const seg = cmd.cast(macho.segment_command_64).?; - try sections.ensureUnusedCapacity(seg.nsects); + try ctx.segments.append(ctx.gpa, seg); + try ctx.sections.ensureUnusedCapacity(ctx.gpa, seg.nsects); for (cmd.getSections()) |sect| { - sections.appendAssumeCapacity(sect); - } - const seg_id: u8 = @intCast(segments.items.len); - try segments.append(seg); - if (mem.eql(u8, seg.segName(), "__TEXT")) { - text_seg = seg_id; + ctx.sections.appendAssumeCapacity(sect); } }, .SYMTAB => { const lc = cmd.cast(macho.symtab_command).?; - const symbols = @as([*]align(1) const macho.nlist_64, @ptrCast(bytes.ptr + lc.symoff))[0..lc.nsyms]; - const strings = bytes[lc.stroff..][0..lc.strsize]; - symtab.symbols = symbols; - symtab.strings = strings; + const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(ctx.data.ptr + lc.symoff))[0..lc.nsyms]; + const strtab = ctx.data[lc.stroff..][0..lc.strsize]; + try ctx.symtab.appendUnalignedSlice(ctx.gpa, symtab); + try ctx.strtab.appendSlice(ctx.gpa, strtab); }, .DYSYMTAB => { const lc = cmd.cast(macho.dysymtab_command).?; - const indexes = @as([*]align(1) const u32, @ptrCast(bytes.ptr + lc.indirectsymoff))[0..lc.nindirectsyms]; - symtab.indirect_symbols = indexes; + const indexes = @as([*]align(1) const u32, @ptrCast(ctx.data.ptr + lc.indirectsymoff))[0..lc.nindirectsyms]; + try ctx.indsymtab.appendUnalignedSlice(ctx.gpa, indexes); }, .LOAD_DYLIB, .LOAD_WEAK_DYLIB, .REEXPORT_DYLIB, => { - try imports.append(cmd.getDylibPathName()); - }, - .DYLD_INFO_ONLY => { - dyld_info_lc = cmd.cast(macho.dyld_info_command).?; + try ctx.imports.append(ctx.gpa, cmd.getDylibPathName()); }, else => {}, } @@ -786,899 +757,919 @@ const MachODumper = struct { } } - switch (check.kind) { - .headers => { - try dumpHeader(hdr, writer); - - var it: LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = bytes[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var i: usize = 0; - while (it.next()) |cmd| { - try dumpLoadCommand(cmd, i, writer); - try writer.writeByte('\n'); - - i += 1; - } - }, - - .symtab => if (symtab.symbols.len > 0) { - try dumpSymtab(sections.items, imports.items, symtab, writer); - } else return step.fail("no symbol table found", .{}), - - .indirect_symtab => if (symtab.symbols.len > 0 and symtab.indirect_symbols.len > 0) { - try dumpIndirectSymtab(gpa, sections.items, symtab, writer); - } else return step.fail("no indirect symbol table found", .{}), - - .dyld_rebase, - .dyld_bind, - .dyld_weak_bind, - .dyld_lazy_bind, - => { - if (dyld_info_lc == null) return step.fail("no dyld info found", .{}); - const lc = dyld_info_lc.?; - - switch (check.kind) { - .dyld_rebase => if (lc.rebase_size > 0) { - const data = bytes[lc.rebase_off..][0..lc.rebase_size]; - try writer.writeAll(dyld_rebase_label ++ "\n"); - try dumpRebaseInfo(gpa, data, segments.items, writer); - } else return step.fail("no rebase data found", .{}), - - .dyld_bind => if (lc.bind_size > 0) { - const data = bytes[lc.bind_off..][0..lc.bind_size]; - try writer.writeAll(dyld_bind_label ++ "\n"); - try dumpBindInfo(gpa, data, segments.items, imports.items, writer); - } else return step.fail("no bind data found", .{}), - - .dyld_weak_bind => if (lc.weak_bind_size > 0) { - const data = bytes[lc.weak_bind_off..][0..lc.weak_bind_size]; - try writer.writeAll(dyld_weak_bind_label ++ "\n"); - try dumpBindInfo(gpa, data, segments.items, imports.items, writer); - } else return step.fail("no weak bind data found", .{}), + fn getString(ctx: ObjectContext, off: u32) [:0]const u8 { + assert(off < ctx.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0); + } - .dyld_lazy_bind => if (lc.lazy_bind_size > 0) { - const data = bytes[lc.lazy_bind_off..][0..lc.lazy_bind_size]; - try writer.writeAll(dyld_lazy_bind_label ++ "\n"); - try dumpBindInfo(gpa, data, segments.items, imports.items, writer); - } else return step.fail("no lazy bind data found", .{}), + fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator { + const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds]; + return .{ .ncmds = ctx.header.ncmds, .buffer = data }; + } - else => unreachable, - } - }, + fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand { + var it = ctx.getLoadCommandIterator(); + while (it.next()) |lc| if (lc.cmd() == cmd) { + return lc; + }; + return null; + } - .exports => blk: { - if (dyld_info_lc) |lc| { - if (lc.export_size > 0) { - const data = bytes[lc.export_off..][0..lc.export_size]; - try writer.writeAll(exports_label ++ "\n"); - try dumpExportsTrie(gpa, data, segments.items[text_seg.?], writer); - break :blk; - } - } - return step.fail("no exports data found", .{}); - }, + fn getSegmentByName(ctx: ObjectContext, name: []const u8) ?macho.segment_command_64 { + for (ctx.segments.items) |seg| { + if (mem.eql(u8, seg.segName(), name)) return seg; + } + return null; + } - else => return step.fail("invalid check kind for MachO file format: {s}", .{@tagName(check.kind)}), + fn getSectionByName(ctx: ObjectContext, segname: []const u8, sectname: []const u8) ?macho.section_64 { + for (ctx.sections.items) |sect| { + if (mem.eql(u8, sect.segName(), segname) and mem.eql(u8, sect.sectName(), sectname)) return sect; + } + return null; } - return output.toOwnedSlice(); - } + fn dumpHeader(hdr: macho.mach_header_64, writer: anytype) !void { + const cputype = switch (hdr.cputype) { + macho.CPU_TYPE_ARM64 => "ARM64", + macho.CPU_TYPE_X86_64 => "X86_64", + else => "Unknown", + }; + const filetype = switch (hdr.filetype) { + macho.MH_OBJECT => "MH_OBJECT", + macho.MH_EXECUTE => "MH_EXECUTE", + macho.MH_FVMLIB => "MH_FVMLIB", + macho.MH_CORE => "MH_CORE", + macho.MH_PRELOAD => "MH_PRELOAD", + macho.MH_DYLIB => "MH_DYLIB", + macho.MH_DYLINKER => "MH_DYLINKER", + macho.MH_BUNDLE => "MH_BUNDLE", + macho.MH_DYLIB_STUB => "MH_DYLIB_STUB", + macho.MH_DSYM => "MH_DSYM", + macho.MH_KEXT_BUNDLE => "MH_KEXT_BUNDLE", + else => "Unknown", + }; - fn dumpHeader(hdr: macho.mach_header_64, writer: anytype) !void { - const cputype = switch (hdr.cputype) { - macho.CPU_TYPE_ARM64 => "ARM64", - macho.CPU_TYPE_X86_64 => "X86_64", - else => "Unknown", - }; - const filetype = switch (hdr.filetype) { - macho.MH_OBJECT => "MH_OBJECT", - macho.MH_EXECUTE => "MH_EXECUTE", - macho.MH_FVMLIB => "MH_FVMLIB", - macho.MH_CORE => "MH_CORE", - macho.MH_PRELOAD => "MH_PRELOAD", - macho.MH_DYLIB => "MH_DYLIB", - macho.MH_DYLINKER => "MH_DYLINKER", - macho.MH_BUNDLE => "MH_BUNDLE", - macho.MH_DYLIB_STUB => "MH_DYLIB_STUB", - macho.MH_DSYM => "MH_DSYM", - macho.MH_KEXT_BUNDLE => "MH_KEXT_BUNDLE", - else => "Unknown", - }; + try writer.print( + \\header + \\cputype {s} + \\filetype {s} + \\ncmds {d} + \\sizeofcmds {x} + \\flags + , .{ + cputype, + filetype, + hdr.ncmds, + hdr.sizeofcmds, + }); + + if (hdr.flags > 0) { + if (hdr.flags & macho.MH_NOUNDEFS != 0) try writer.writeAll(" NOUNDEFS"); + if (hdr.flags & macho.MH_INCRLINK != 0) try writer.writeAll(" INCRLINK"); + if (hdr.flags & macho.MH_DYLDLINK != 0) try writer.writeAll(" DYLDLINK"); + if (hdr.flags & macho.MH_BINDATLOAD != 0) try writer.writeAll(" BINDATLOAD"); + if (hdr.flags & macho.MH_PREBOUND != 0) try writer.writeAll(" PREBOUND"); + if (hdr.flags & macho.MH_SPLIT_SEGS != 0) try writer.writeAll(" SPLIT_SEGS"); + if (hdr.flags & macho.MH_LAZY_INIT != 0) try writer.writeAll(" LAZY_INIT"); + if (hdr.flags & macho.MH_TWOLEVEL != 0) try writer.writeAll(" TWOLEVEL"); + if (hdr.flags & macho.MH_FORCE_FLAT != 0) try writer.writeAll(" FORCE_FLAT"); + if (hdr.flags & macho.MH_NOMULTIDEFS != 0) try writer.writeAll(" NOMULTIDEFS"); + if (hdr.flags & macho.MH_NOFIXPREBINDING != 0) try writer.writeAll(" NOFIXPREBINDING"); + if (hdr.flags & macho.MH_PREBINDABLE != 0) try writer.writeAll(" PREBINDABLE"); + if (hdr.flags & macho.MH_ALLMODSBOUND != 0) try writer.writeAll(" ALLMODSBOUND"); + if (hdr.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) try writer.writeAll(" SUBSECTIONS_VIA_SYMBOLS"); + if (hdr.flags & macho.MH_CANONICAL != 0) try writer.writeAll(" CANONICAL"); + if (hdr.flags & macho.MH_WEAK_DEFINES != 0) try writer.writeAll(" WEAK_DEFINES"); + if (hdr.flags & macho.MH_BINDS_TO_WEAK != 0) try writer.writeAll(" BINDS_TO_WEAK"); + if (hdr.flags & macho.MH_ALLOW_STACK_EXECUTION != 0) try writer.writeAll(" ALLOW_STACK_EXECUTION"); + if (hdr.flags & macho.MH_ROOT_SAFE != 0) try writer.writeAll(" ROOT_SAFE"); + if (hdr.flags & macho.MH_SETUID_SAFE != 0) try writer.writeAll(" SETUID_SAFE"); + if (hdr.flags & macho.MH_NO_REEXPORTED_DYLIBS != 0) try writer.writeAll(" NO_REEXPORTED_DYLIBS"); + if (hdr.flags & macho.MH_PIE != 0) try writer.writeAll(" PIE"); + if (hdr.flags & macho.MH_DEAD_STRIPPABLE_DYLIB != 0) try writer.writeAll(" DEAD_STRIPPABLE_DYLIB"); + if (hdr.flags & macho.MH_HAS_TLV_DESCRIPTORS != 0) try writer.writeAll(" HAS_TLV_DESCRIPTORS"); + if (hdr.flags & macho.MH_NO_HEAP_EXECUTION != 0) try writer.writeAll(" NO_HEAP_EXECUTION"); + if (hdr.flags & macho.MH_APP_EXTENSION_SAFE != 0) try writer.writeAll(" APP_EXTENSION_SAFE"); + if (hdr.flags & macho.MH_NLIST_OUTOFSYNC_WITH_DYLDINFO != 0) try writer.writeAll(" NLIST_OUTOFSYNC_WITH_DYLDINFO"); + } - try writer.print( - \\header - \\cputype {s} - \\filetype {s} - \\ncmds {d} - \\sizeofcmds {x} - \\flags - , .{ - cputype, - filetype, - hdr.ncmds, - hdr.sizeofcmds, - }); - - if (hdr.flags > 0) { - if (hdr.flags & macho.MH_NOUNDEFS != 0) try writer.writeAll(" NOUNDEFS"); - if (hdr.flags & macho.MH_INCRLINK != 0) try writer.writeAll(" INCRLINK"); - if (hdr.flags & macho.MH_DYLDLINK != 0) try writer.writeAll(" DYLDLINK"); - if (hdr.flags & macho.MH_BINDATLOAD != 0) try writer.writeAll(" BINDATLOAD"); - if (hdr.flags & macho.MH_PREBOUND != 0) try writer.writeAll(" PREBOUND"); - if (hdr.flags & macho.MH_SPLIT_SEGS != 0) try writer.writeAll(" SPLIT_SEGS"); - if (hdr.flags & macho.MH_LAZY_INIT != 0) try writer.writeAll(" LAZY_INIT"); - if (hdr.flags & macho.MH_TWOLEVEL != 0) try writer.writeAll(" TWOLEVEL"); - if (hdr.flags & macho.MH_FORCE_FLAT != 0) try writer.writeAll(" FORCE_FLAT"); - if (hdr.flags & macho.MH_NOMULTIDEFS != 0) try writer.writeAll(" NOMULTIDEFS"); - if (hdr.flags & macho.MH_NOFIXPREBINDING != 0) try writer.writeAll(" NOFIXPREBINDING"); - if (hdr.flags & macho.MH_PREBINDABLE != 0) try writer.writeAll(" PREBINDABLE"); - if (hdr.flags & macho.MH_ALLMODSBOUND != 0) try writer.writeAll(" ALLMODSBOUND"); - if (hdr.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) try writer.writeAll(" SUBSECTIONS_VIA_SYMBOLS"); - if (hdr.flags & macho.MH_CANONICAL != 0) try writer.writeAll(" CANONICAL"); - if (hdr.flags & macho.MH_WEAK_DEFINES != 0) try writer.writeAll(" WEAK_DEFINES"); - if (hdr.flags & macho.MH_BINDS_TO_WEAK != 0) try writer.writeAll(" BINDS_TO_WEAK"); - if (hdr.flags & macho.MH_ALLOW_STACK_EXECUTION != 0) try writer.writeAll(" ALLOW_STACK_EXECUTION"); - if (hdr.flags & macho.MH_ROOT_SAFE != 0) try writer.writeAll(" ROOT_SAFE"); - if (hdr.flags & macho.MH_SETUID_SAFE != 0) try writer.writeAll(" SETUID_SAFE"); - if (hdr.flags & macho.MH_NO_REEXPORTED_DYLIBS != 0) try writer.writeAll(" NO_REEXPORTED_DYLIBS"); - if (hdr.flags & macho.MH_PIE != 0) try writer.writeAll(" PIE"); - if (hdr.flags & macho.MH_DEAD_STRIPPABLE_DYLIB != 0) try writer.writeAll(" DEAD_STRIPPABLE_DYLIB"); - if (hdr.flags & macho.MH_HAS_TLV_DESCRIPTORS != 0) try writer.writeAll(" HAS_TLV_DESCRIPTORS"); - if (hdr.flags & macho.MH_NO_HEAP_EXECUTION != 0) try writer.writeAll(" NO_HEAP_EXECUTION"); - if (hdr.flags & macho.MH_APP_EXTENSION_SAFE != 0) try writer.writeAll(" APP_EXTENSION_SAFE"); - if (hdr.flags & macho.MH_NLIST_OUTOFSYNC_WITH_DYLDINFO != 0) try writer.writeAll(" NLIST_OUTOFSYNC_WITH_DYLDINFO"); + try writer.writeByte('\n'); } - try writer.writeByte('\n'); - } + fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void { + // print header first + try writer.print( + \\LC {d} + \\cmd {s} + \\cmdsize {d} + , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); + + switch (lc.cmd()) { + .SEGMENT_64 => { + const seg = lc.cast(macho.segment_command_64).?; + try writer.writeByte('\n'); + try writer.print( + \\segname {s} + \\vmaddr {x} + \\vmsize {x} + \\fileoff {x} + \\filesz {x} + , .{ + seg.segName(), + seg.vmaddr, + seg.vmsize, + seg.fileoff, + seg.filesize, + }); - fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void { - // print header first - try writer.print( - \\LC {d} - \\cmd {s} - \\cmdsize {d} - , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); - - switch (lc.cmd()) { - .SEGMENT_64 => { - const seg = lc.cast(macho.segment_command_64).?; - try writer.writeByte('\n'); - try writer.print( - \\segname {s} - \\vmaddr {x} - \\vmsize {x} - \\fileoff {x} - \\filesz {x} - , .{ - seg.segName(), - seg.vmaddr, - seg.vmsize, - seg.fileoff, - seg.filesize, - }); + for (lc.getSections()) |sect| { + try writer.writeByte('\n'); + try writer.print( + \\sectname {s} + \\addr {x} + \\size {x} + \\offset {x} + \\align {x} + , .{ + sect.sectName(), + sect.addr, + sect.size, + sect.offset, + sect.@"align", + }); + } + }, - for (lc.getSections()) |sect| { + .ID_DYLIB, + .LOAD_DYLIB, + .LOAD_WEAK_DYLIB, + .REEXPORT_DYLIB, + => { + const dylib = lc.cast(macho.dylib_command).?; try writer.writeByte('\n'); try writer.print( - \\sectname {s} - \\addr {x} - \\size {x} - \\offset {x} - \\align {x} + \\name {s} + \\timestamp {d} + \\current version {x} + \\compatibility version {x} , .{ - sect.sectName(), - sect.addr, - sect.size, - sect.offset, - sect.@"align", + lc.getDylibPathName(), + dylib.dylib.timestamp, + dylib.dylib.current_version, + dylib.dylib.compatibility_version, }); - } - }, - - .ID_DYLIB, - .LOAD_DYLIB, - .LOAD_WEAK_DYLIB, - .REEXPORT_DYLIB, - => { - const dylib = lc.cast(macho.dylib_command).?; - try writer.writeByte('\n'); - try writer.print( - \\name {s} - \\timestamp {d} - \\current version {x} - \\compatibility version {x} - , .{ - lc.getDylibPathName(), - dylib.dylib.timestamp, - dylib.dylib.current_version, - dylib.dylib.compatibility_version, - }); - }, + }, - .MAIN => { - const main = lc.cast(macho.entry_point_command).?; - try writer.writeByte('\n'); - try writer.print( - \\entryoff {x} - \\stacksize {x} - , .{ main.entryoff, main.stacksize }); - }, + .MAIN => { + const main = lc.cast(macho.entry_point_command).?; + try writer.writeByte('\n'); + try writer.print( + \\entryoff {x} + \\stacksize {x} + , .{ main.entryoff, main.stacksize }); + }, - .RPATH => { - try writer.writeByte('\n'); - try writer.print( - \\path {s} - , .{ - lc.getRpathPathName(), - }); - }, + .RPATH => { + try writer.writeByte('\n'); + try writer.print( + \\path {s} + , .{ + lc.getRpathPathName(), + }); + }, - .UUID => { - const uuid = lc.cast(macho.uuid_command).?; - try writer.writeByte('\n'); - try writer.print("uuid {x}", .{std.fmt.fmtSliceHexLower(&uuid.uuid)}); - }, + .UUID => { + const uuid = lc.cast(macho.uuid_command).?; + try writer.writeByte('\n'); + try writer.print("uuid {x}", .{std.fmt.fmtSliceHexLower(&uuid.uuid)}); + }, - .DATA_IN_CODE, - .FUNCTION_STARTS, - .CODE_SIGNATURE, - => { - const llc = lc.cast(macho.linkedit_data_command).?; - try writer.writeByte('\n'); - try writer.print( - \\dataoff {x} - \\datasize {x} - , .{ llc.dataoff, llc.datasize }); - }, + .DATA_IN_CODE, + .FUNCTION_STARTS, + .CODE_SIGNATURE, + => { + const llc = lc.cast(macho.linkedit_data_command).?; + try writer.writeByte('\n'); + try writer.print( + \\dataoff {x} + \\datasize {x} + , .{ llc.dataoff, llc.datasize }); + }, - .DYLD_INFO_ONLY => { - const dlc = lc.cast(macho.dyld_info_command).?; - try writer.writeByte('\n'); - try writer.print( - \\rebaseoff {x} - \\rebasesize {x} - \\bindoff {x} - \\bindsize {x} - \\weakbindoff {x} - \\weakbindsize {x} - \\lazybindoff {x} - \\lazybindsize {x} - \\exportoff {x} - \\exportsize {x} - , .{ - dlc.rebase_off, - dlc.rebase_size, - dlc.bind_off, - dlc.bind_size, - dlc.weak_bind_off, - dlc.weak_bind_size, - dlc.lazy_bind_off, - dlc.lazy_bind_size, - dlc.export_off, - dlc.export_size, - }); - }, + .DYLD_INFO_ONLY => { + const dlc = lc.cast(macho.dyld_info_command).?; + try writer.writeByte('\n'); + try writer.print( + \\rebaseoff {x} + \\rebasesize {x} + \\bindoff {x} + \\bindsize {x} + \\weakbindoff {x} + \\weakbindsize {x} + \\lazybindoff {x} + \\lazybindsize {x} + \\exportoff {x} + \\exportsize {x} + , .{ + dlc.rebase_off, + dlc.rebase_size, + dlc.bind_off, + dlc.bind_size, + dlc.weak_bind_off, + dlc.weak_bind_size, + dlc.lazy_bind_off, + dlc.lazy_bind_size, + dlc.export_off, + dlc.export_size, + }); + }, - .SYMTAB => { - const slc = lc.cast(macho.symtab_command).?; - try writer.writeByte('\n'); - try writer.print( - \\symoff {x} - \\nsyms {x} - \\stroff {x} - \\strsize {x} - , .{ - slc.symoff, - slc.nsyms, - slc.stroff, - slc.strsize, - }); - }, + .SYMTAB => { + const slc = lc.cast(macho.symtab_command).?; + try writer.writeByte('\n'); + try writer.print( + \\symoff {x} + \\nsyms {x} + \\stroff {x} + \\strsize {x} + , .{ + slc.symoff, + slc.nsyms, + slc.stroff, + slc.strsize, + }); + }, - .DYSYMTAB => { - const dlc = lc.cast(macho.dysymtab_command).?; - try writer.writeByte('\n'); - try writer.print( - \\ilocalsym {x} - \\nlocalsym {x} - \\iextdefsym {x} - \\nextdefsym {x} - \\iundefsym {x} - \\nundefsym {x} - \\indirectsymoff {x} - \\nindirectsyms {x} - , .{ - dlc.ilocalsym, - dlc.nlocalsym, - dlc.iextdefsym, - dlc.nextdefsym, - dlc.iundefsym, - dlc.nundefsym, - dlc.indirectsymoff, - dlc.nindirectsyms, - }); - }, + .DYSYMTAB => { + const dlc = lc.cast(macho.dysymtab_command).?; + try writer.writeByte('\n'); + try writer.print( + \\ilocalsym {x} + \\nlocalsym {x} + \\iextdefsym {x} + \\nextdefsym {x} + \\iundefsym {x} + \\nundefsym {x} + \\indirectsymoff {x} + \\nindirectsyms {x} + , .{ + dlc.ilocalsym, + dlc.nlocalsym, + dlc.iextdefsym, + dlc.nextdefsym, + dlc.iundefsym, + dlc.nundefsym, + dlc.indirectsymoff, + dlc.nindirectsyms, + }); + }, - .BUILD_VERSION => { - const blc = lc.cast(macho.build_version_command).?; - try writer.writeByte('\n'); - try writer.print( - \\platform {s} - \\minos {d}.{d}.{d} - \\sdk {d}.{d}.{d} - \\ntools {d} - , .{ - @tagName(blc.platform), - blc.minos >> 16, - @as(u8, @truncate(blc.minos >> 8)), - @as(u8, @truncate(blc.minos)), - blc.sdk >> 16, - @as(u8, @truncate(blc.sdk >> 8)), - @as(u8, @truncate(blc.sdk)), - blc.ntools, - }); - for (lc.getBuildVersionTools()) |tool| { + .BUILD_VERSION => { + const blc = lc.cast(macho.build_version_command).?; try writer.writeByte('\n'); - switch (tool.tool) { - .CLANG, .SWIFT, .LD, .LLD, .ZIG => try writer.print("tool {s}\n", .{@tagName(tool.tool)}), - else => |x| try writer.print("tool {d}\n", .{@intFromEnum(x)}), + try writer.print( + \\platform {s} + \\minos {d}.{d}.{d} + \\sdk {d}.{d}.{d} + \\ntools {d} + , .{ + @tagName(blc.platform), + blc.minos >> 16, + @as(u8, @truncate(blc.minos >> 8)), + @as(u8, @truncate(blc.minos)), + blc.sdk >> 16, + @as(u8, @truncate(blc.sdk >> 8)), + @as(u8, @truncate(blc.sdk)), + blc.ntools, + }); + for (lc.getBuildVersionTools()) |tool| { + try writer.writeByte('\n'); + switch (tool.tool) { + .CLANG, .SWIFT, .LD, .LLD, .ZIG => try writer.print("tool {s}\n", .{@tagName(tool.tool)}), + else => |x| try writer.print("tool {d}\n", .{@intFromEnum(x)}), + } + try writer.print( + \\version {d}.{d}.{d} + , .{ + tool.version >> 16, + @as(u8, @truncate(tool.version >> 8)), + @as(u8, @truncate(tool.version)), + }); } + }, + + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_WATCHOS, + .VERSION_MIN_TVOS, + => { + const vlc = lc.cast(macho.version_min_command).?; + try writer.writeByte('\n'); try writer.print( \\version {d}.{d}.{d} + \\sdk {d}.{d}.{d} , .{ - tool.version >> 16, - @as(u8, @truncate(tool.version >> 8)), - @as(u8, @truncate(tool.version)), + vlc.version >> 16, + @as(u8, @truncate(vlc.version >> 8)), + @as(u8, @truncate(vlc.version)), + vlc.sdk >> 16, + @as(u8, @truncate(vlc.sdk >> 8)), + @as(u8, @truncate(vlc.sdk)), }); - } - }, - - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_WATCHOS, - .VERSION_MIN_TVOS, - => { - const vlc = lc.cast(macho.version_min_command).?; - try writer.writeByte('\n'); - try writer.print( - \\version {d}.{d}.{d} - \\sdk {d}.{d}.{d} - , .{ - vlc.version >> 16, - @as(u8, @truncate(vlc.version >> 8)), - @as(u8, @truncate(vlc.version)), - vlc.sdk >> 16, - @as(u8, @truncate(vlc.sdk >> 8)), - @as(u8, @truncate(vlc.sdk)), - }); - }, + }, - else => {}, + else => {}, + } } - } - fn dumpSymtab( - sections: []const macho.section_64, - imports: []const []const u8, - symtab: Symtab, - writer: anytype, - ) !void { - try writer.writeAll(symtab_label ++ "\n"); - - for (symtab.symbols) |sym| { - if (sym.stab()) continue; - const sym_name = symtab.getString(sym.n_strx); - if (sym.sect()) { - const sect = sections[sym.n_sect - 1]; - try writer.print("{x} ({s},{s})", .{ - sym.n_value, - sect.segName(), - sect.sectName(), - }); - if (sym.n_desc & macho.REFERENCED_DYNAMICALLY != 0) try writer.writeAll(" [referenced dynamically]"); - if (sym.weakDef()) try writer.writeAll(" weak"); - if (sym.weakRef()) try writer.writeAll(" weakref"); - if (sym.ext()) { - if (sym.pext()) try writer.writeAll(" private"); - try writer.writeAll(" external"); - } else if (sym.pext()) try writer.writeAll(" (was private external)"); - try writer.print(" {s}\n", .{sym_name}); - } else if (sym.tentative()) { - const alignment = (sym.n_desc >> 8) & 0x0F; - try writer.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment }); - if (sym.ext()) try writer.writeAll(" external"); - try writer.print(" {s}\n", .{sym_name}); - } else if (sym.undf()) { - const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - const import_name = blk: { - if (ordinal <= 0) { - if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF) - break :blk "self import"; - if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) - break :blk "main executable"; - if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP) - break :blk "flat lookup"; - unreachable; - } - const full_path = imports[@as(u16, @bitCast(ordinal)) - 1]; - const basename = fs.path.basename(full_path); - assert(basename.len > 0); - const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; - break :blk basename[0..ext]; - }; - try writer.writeAll("(undefined)"); - if (sym.weakRef()) try writer.writeAll(" weakref"); - if (sym.ext()) try writer.writeAll(" external"); - try writer.print(" {s} (from {s})\n", .{ - sym_name, - import_name, - }); + fn dumpSymtab(ctx: ObjectContext, writer: anytype) !void { + try writer.writeAll(symtab_label ++ "\n"); + + for (ctx.symtab.items) |sym| { + if (sym.stab()) continue; + const sym_name = ctx.getString(sym.n_strx); + if (sym.sect()) { + const sect = ctx.sections.items[sym.n_sect - 1]; + try writer.print("{x} ({s},{s})", .{ + sym.n_value, + sect.segName(), + sect.sectName(), + }); + if (sym.n_desc & macho.REFERENCED_DYNAMICALLY != 0) try writer.writeAll(" [referenced dynamically]"); + if (sym.weakDef()) try writer.writeAll(" weak"); + if (sym.weakRef()) try writer.writeAll(" weakref"); + if (sym.ext()) { + if (sym.pext()) try writer.writeAll(" private"); + try writer.writeAll(" external"); + } else if (sym.pext()) try writer.writeAll(" (was private external)"); + try writer.print(" {s}\n", .{sym_name}); + } else if (sym.tentative()) { + const alignment = (sym.n_desc >> 8) & 0x0F; + try writer.print(" 0x{x:0>16} (common) (alignment 2^{d})", .{ sym.n_value, alignment }); + if (sym.ext()) try writer.writeAll(" external"); + try writer.print(" {s}\n", .{sym_name}); + } else if (sym.undf()) { + const ordinal = @divFloor(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); + const import_name = blk: { + if (ordinal <= 0) { + if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF) + break :blk "self import"; + if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) + break :blk "main executable"; + if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP) + break :blk "flat lookup"; + unreachable; + } + const full_path = ctx.imports.items[@as(u16, @bitCast(ordinal)) - 1]; + const basename = fs.path.basename(full_path); + assert(basename.len > 0); + const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; + break :blk basename[0..ext]; + }; + try writer.writeAll("(undefined)"); + if (sym.weakRef()) try writer.writeAll(" weakref"); + if (sym.ext()) try writer.writeAll(" external"); + try writer.print(" {s} (from {s})\n", .{ + sym_name, + import_name, + }); + } } } - } - fn dumpIndirectSymtab( - gpa: Allocator, - sections: []const macho.section_64, - symtab: Symtab, - writer: anytype, - ) !void { - try writer.writeAll(indirect_symtab_label ++ "\n"); + fn dumpIndirectSymtab(ctx: ObjectContext, writer: anytype) !void { + try writer.writeAll(indirect_symtab_label ++ "\n"); - var sects = std.ArrayList(macho.section_64).init(gpa); - defer sects.deinit(); - try sects.ensureUnusedCapacity(3); + var sects_buffer: [3]macho.section_64 = undefined; + const sects = blk: { + var count: usize = 0; + if (ctx.getSectionByName("__TEXT", "__stubs")) |sect| { + sects_buffer[count] = sect; + count += 1; + } + if (ctx.getSectionByName("__DATA_CONST", "__got")) |sect| { + sects_buffer[count] = sect; + count += 1; + } + if (ctx.getSectionByName("__DATA", "__la_symbol_ptr")) |sect| { + sects_buffer[count] = sect; + count += 1; + } + break :blk sects_buffer[0..count]; + }; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__stubs")) sects.appendAssumeCapacity(sect); - if (mem.eql(u8, sect.sectName(), "__got")) sects.appendAssumeCapacity(sect); - if (mem.eql(u8, sect.sectName(), "__la_symbol_ptr")) sects.appendAssumeCapacity(sect); - } + const sortFn = struct { + fn sortFn(c: void, lhs: macho.section_64, rhs: macho.section_64) bool { + _ = c; + return lhs.reserved1 < rhs.reserved1; + } + }.sortFn; + mem.sort(macho.section_64, sects, {}, sortFn); - const sortFn = struct { - fn sortFn(ctx: void, lhs: macho.section_64, rhs: macho.section_64) bool { - _ = ctx; - return lhs.reserved1 < rhs.reserved1; - } - }.sortFn; - mem.sort(macho.section_64, sects.items, {}, sortFn); - - var i: usize = 0; - while (i < sects.items.len) : (i += 1) { - const sect = sects.items[i]; - const start = sect.reserved1; - const end = if (i + 1 >= sects.items.len) symtab.indirect_symbols.len else sects.items[i + 1].reserved1; - const entry_size = blk: { - if (mem.eql(u8, sect.sectName(), "__stubs")) break :blk sect.reserved2; - break :blk @sizeOf(u64); - }; + var i: usize = 0; + while (i < sects.len) : (i += 1) { + const sect = sects[i]; + const start = sect.reserved1; + const end = if (i + 1 >= sects.len) ctx.indsymtab.items.len else sects[i + 1].reserved1; + const entry_size = blk: { + if (mem.eql(u8, sect.sectName(), "__stubs")) break :blk sect.reserved2; + break :blk @sizeOf(u64); + }; - try writer.print("{s},{s}\n", .{ sect.segName(), sect.sectName() }); - try writer.print("nentries {d}\n", .{end - start}); - for (symtab.indirect_symbols[start..end], 0..) |index, j| { - const sym = symtab.symbols[index]; - const addr = sect.addr + entry_size * j; - try writer.print("0x{x} {d} {s}\n", .{ addr, index, symtab.getString(sym.n_strx) }); + try writer.print("{s},{s}\n", .{ sect.segName(), sect.sectName() }); + try writer.print("nentries {d}\n", .{end - start}); + for (ctx.indsymtab.items[start..end], 0..) |index, j| { + const sym = ctx.symtab.items[index]; + const addr = sect.addr + entry_size * j; + try writer.print("0x{x} {d} {s}\n", .{ addr, index, ctx.getString(sym.n_strx) }); + } } } - } - fn dumpRebaseInfo( - gpa: Allocator, - data: []const u8, - segments: []const macho.segment_command_64, - writer: anytype, - ) !void { - var rebases = std.ArrayList(u64).init(gpa); - defer rebases.deinit(); - try parseRebaseInfo(data, segments, &rebases); - mem.sort(u64, rebases.items, {}, std.sort.asc(u64)); - for (rebases.items) |addr| { - try writer.print("0x{x}\n", .{addr}); + fn dumpRebaseInfo(ctx: ObjectContext, data: []const u8, writer: anytype) !void { + var rebases = std.ArrayList(u64).init(ctx.gpa); + defer rebases.deinit(); + try ctx.parseRebaseInfo(data, &rebases); + mem.sort(u64, rebases.items, {}, std.sort.asc(u64)); + for (rebases.items) |addr| { + try writer.print("0x{x}\n", .{addr}); + } } - } - fn parseRebaseInfo( - data: []const u8, - segments: []const macho.segment_command_64, - rebases: *std.ArrayList(u64), - ) !void { - var stream = std.io.fixedBufferStream(data); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); + fn parseRebaseInfo(ctx: ObjectContext, data: []const u8, rebases: *std.ArrayList(u64)) !void { + var stream = std.io.fixedBufferStream(data); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); - var seg_id: ?u8 = null; - var offset: u64 = 0; - while (true) { - const byte = reader.readByte() catch break; - const opc = byte & macho.REBASE_OPCODE_MASK; - const imm = byte & macho.REBASE_IMMEDIATE_MASK; - switch (opc) { - macho.REBASE_OPCODE_DONE => break, - macho.REBASE_OPCODE_SET_TYPE_IMM => {}, - macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - seg_id = imm; - offset = try std.leb.readULEB128(u64, reader); - }, - macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED => { - offset += imm * @sizeOf(u64); - }, - macho.REBASE_OPCODE_ADD_ADDR_ULEB => { - const addend = try std.leb.readULEB128(u64, reader); - offset += addend; - }, - macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB => { - const addend = try std.leb.readULEB128(u64, reader); - const seg = segments[seg_id.?]; - const addr = seg.vmaddr + offset; - try rebases.append(addr); - offset += addend + @sizeOf(u64); - }, - macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES, - macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES, - macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, - => { - var ntimes: u64 = 1; - var skip: u64 = 0; - switch (opc) { - macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES => { - ntimes = imm; - }, - macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES => { - ntimes = try std.leb.readULEB128(u64, reader); - }, - macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB => { - ntimes = try std.leb.readULEB128(u64, reader); - skip = try std.leb.readULEB128(u64, reader); - }, - else => unreachable, - } - const seg = segments[seg_id.?]; - const base_addr = seg.vmaddr; - var count: usize = 0; - while (count < ntimes) : (count += 1) { - const addr = base_addr + offset; + var seg_id: ?u8 = null; + var offset: u64 = 0; + while (true) { + const byte = reader.readByte() catch break; + const opc = byte & macho.REBASE_OPCODE_MASK; + const imm = byte & macho.REBASE_IMMEDIATE_MASK; + switch (opc) { + macho.REBASE_OPCODE_DONE => break, + macho.REBASE_OPCODE_SET_TYPE_IMM => {}, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + seg_id = imm; + offset = try std.leb.readULEB128(u64, reader); + }, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED => { + offset += imm * @sizeOf(u64); + }, + macho.REBASE_OPCODE_ADD_ADDR_ULEB => { + const addend = try std.leb.readULEB128(u64, reader); + offset += addend; + }, + macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB => { + const addend = try std.leb.readULEB128(u64, reader); + const seg = ctx.segments.items[seg_id.?]; + const addr = seg.vmaddr + offset; try rebases.append(addr); - offset += skip + @sizeOf(u64); - } - }, - else => break, + offset += addend + @sizeOf(u64); + }, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + => { + var ntimes: u64 = 1; + var skip: u64 = 0; + switch (opc) { + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES => { + ntimes = imm; + }, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES => { + ntimes = try std.leb.readULEB128(u64, reader); + }, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB => { + ntimes = try std.leb.readULEB128(u64, reader); + skip = try std.leb.readULEB128(u64, reader); + }, + else => unreachable, + } + const seg = ctx.segments.items[seg_id.?]; + const base_addr = seg.vmaddr; + var count: usize = 0; + while (count < ntimes) : (count += 1) { + const addr = base_addr + offset; + try rebases.append(addr); + offset += skip + @sizeOf(u64); + } + }, + else => break, + } } } - } - const Binding = struct { - address: u64, - addend: i64, - ordinal: u16, - tag: Tag, - name: []const u8, + const Binding = struct { + address: u64, + addend: i64, + ordinal: u16, + tag: Tag, + name: []const u8, - fn deinit(binding: *Binding, gpa: Allocator) void { - gpa.free(binding.name); - } + fn deinit(binding: *Binding, gpa: Allocator) void { + gpa.free(binding.name); + } - fn lessThan(ctx: void, lhs: Binding, rhs: Binding) bool { - _ = ctx; - return lhs.address < rhs.address; - } + fn lessThan(ctx: void, lhs: Binding, rhs: Binding) bool { + _ = ctx; + return lhs.address < rhs.address; + } - const Tag = enum { - ord, - self, - exe, - flat, + const Tag = enum { + ord, + self, + exe, + flat, + }; }; - }; - fn dumpBindInfo( - gpa: Allocator, - data: []const u8, - segments: []const macho.segment_command_64, - dylibs: []const []const u8, - writer: anytype, - ) !void { - var bindings = std.ArrayList(Binding).init(gpa); - defer { - for (bindings.items) |*b| { - b.deinit(gpa); + fn dumpBindInfo(ctx: ObjectContext, data: []const u8, writer: anytype) !void { + var bindings = std.ArrayList(Binding).init(ctx.gpa); + defer { + for (bindings.items) |*b| { + b.deinit(ctx.gpa); + } + bindings.deinit(); + } + try ctx.parseBindInfo(data, &bindings); + mem.sort(Binding, bindings.items, {}, Binding.lessThan); + for (bindings.items) |binding| { + try writer.print("0x{x} [addend: {d}]", .{ binding.address, binding.addend }); + try writer.writeAll(" ("); + switch (binding.tag) { + .self => try writer.writeAll("self"), + .exe => try writer.writeAll("main executable"), + .flat => try writer.writeAll("flat lookup"), + .ord => try writer.writeAll(std.fs.path.basename(ctx.imports.items[binding.ordinal - 1])), + } + try writer.print(") {s}\n", .{binding.name}); } - bindings.deinit(); } - try parseBindInfo(gpa, data, segments, &bindings); - mem.sort(Binding, bindings.items, {}, Binding.lessThan); - for (bindings.items) |binding| { - try writer.print("0x{x} [addend: {d}]", .{ binding.address, binding.addend }); - try writer.writeAll(" ("); - switch (binding.tag) { - .self => try writer.writeAll("self"), - .exe => try writer.writeAll("main executable"), - .flat => try writer.writeAll("flat lookup"), - .ord => try writer.writeAll(std.fs.path.basename(dylibs[binding.ordinal - 1])), + + fn parseBindInfo(ctx: ObjectContext, data: []const u8, bindings: *std.ArrayList(Binding)) !void { + var stream = std.io.fixedBufferStream(data); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + var seg_id: ?u8 = null; + var tag: Binding.Tag = .self; + var ordinal: u16 = 0; + var offset: u64 = 0; + var addend: i64 = 0; + + var name_buf = std.ArrayList(u8).init(ctx.gpa); + defer name_buf.deinit(); + + while (true) { + const byte = reader.readByte() catch break; + const opc = byte & macho.BIND_OPCODE_MASK; + const imm = byte & macho.BIND_IMMEDIATE_MASK; + switch (opc) { + macho.BIND_OPCODE_DONE, + macho.BIND_OPCODE_SET_TYPE_IMM, + => {}, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + tag = .ord; + ordinal = imm; + }, + macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { + switch (imm) { + 0 => tag = .self, + 0xf => tag = .exe, + 0xe => tag = .flat, + else => unreachable, + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + seg_id = imm; + offset = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + name_buf.clearRetainingCapacity(); + try reader.readUntilDelimiterArrayList(&name_buf, 0, std.math.maxInt(u32)); + try name_buf.append(0); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + addend = try std.leb.readILEB128(i64, reader); + }, + macho.BIND_OPCODE_ADD_ADDR_ULEB => { + const x = try std.leb.readULEB128(u64, reader); + offset = @intCast(@as(i64, @intCast(offset)) + @as(i64, @bitCast(x))); + }, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + => { + var add_addr: u64 = 0; + var count: u64 = 1; + var skip: u64 = 0; + + switch (opc) { + macho.BIND_OPCODE_DO_BIND => {}, + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { + add_addr = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { + add_addr = imm * @sizeOf(u64); + }, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { + count = try std.leb.readULEB128(u64, reader); + skip = try std.leb.readULEB128(u64, reader); + }, + else => unreachable, + } + + const seg = ctx.segments.items[seg_id.?]; + var i: u64 = 0; + while (i < count) : (i += 1) { + const addr: u64 = @intCast(@as(i64, @intCast(seg.vmaddr + offset))); + try bindings.append(.{ + .address = addr, + .addend = addend, + .tag = tag, + .ordinal = ordinal, + .name = try ctx.gpa.dupe(u8, name_buf.items), + }); + offset += skip + @sizeOf(u64) + add_addr; + } + }, + else => break, + } } - try writer.print(") {s}\n", .{binding.name}); } - } - fn parseBindInfo( - gpa: Allocator, - data: []const u8, - segments: []const macho.segment_command_64, - bindings: *std.ArrayList(Binding), - ) !void { - var stream = std.io.fixedBufferStream(data); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); + fn dumpExportsTrie(ctx: ObjectContext, data: []const u8, writer: anytype) !void { + const seg = ctx.getSegmentByName("__TEXT") orelse return; - var seg_id: ?u8 = null; - var tag: Binding.Tag = .self; - var ordinal: u16 = 0; - var offset: u64 = 0; - var addend: i64 = 0; + var arena = std.heap.ArenaAllocator.init(ctx.gpa); + defer arena.deinit(); - var name_buf = std.ArrayList(u8).init(gpa); - defer name_buf.deinit(); + var exports = std.ArrayList(Export).init(arena.allocator()); + var it = TrieIterator{ .data = data }; + try parseTrieNode(arena.allocator(), &it, "", &exports); - while (true) { - const byte = reader.readByte() catch break; - const opc = byte & macho.BIND_OPCODE_MASK; - const imm = byte & macho.BIND_IMMEDIATE_MASK; - switch (opc) { - macho.BIND_OPCODE_DONE, - macho.BIND_OPCODE_SET_TYPE_IMM, - => {}, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { - tag = .ord; - ordinal = imm; - }, - macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { - switch (imm) { - 0 => tag = .self, - 0xf => tag = .exe, - 0xe => tag = .flat, - else => unreachable, - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - seg_id = imm; - offset = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - name_buf.clearRetainingCapacity(); - try reader.readUntilDelimiterArrayList(&name_buf, 0, std.math.maxInt(u32)); - try name_buf.append(0); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - addend = try std.leb.readILEB128(i64, reader); - }, - macho.BIND_OPCODE_ADD_ADDR_ULEB => { - const x = try std.leb.readULEB128(u64, reader); - offset = @intCast(@as(i64, @intCast(offset)) + @as(i64, @bitCast(x))); - }, - macho.BIND_OPCODE_DO_BIND, - macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB, - macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, - => { - var add_addr: u64 = 0; - var count: u64 = 1; - var skip: u64 = 0; - - switch (opc) { - macho.BIND_OPCODE_DO_BIND => {}, - macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { - add_addr = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { - add_addr = imm * @sizeOf(u64); - }, - macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { - count = try std.leb.readULEB128(u64, reader); - skip = try std.leb.readULEB128(u64, reader); - }, - else => unreachable, - } + mem.sort(Export, exports.items, {}, Export.lessThan); - const seg = segments[seg_id.?]; - var i: u64 = 0; - while (i < count) : (i += 1) { - const addr: u64 = @intCast(@as(i64, @intCast(seg.vmaddr + offset))); - try bindings.append(.{ - .address = addr, - .addend = addend, - .tag = tag, - .ordinal = ordinal, - .name = try gpa.dupe(u8, name_buf.items), - }); - offset += skip + @sizeOf(u64) + add_addr; - } - }, - else => break, + for (exports.items) |exp| { + switch (exp.tag) { + .@"export" => { + const info = exp.data.@"export"; + if (info.kind != .regular or info.weak) { + try writer.writeByte('['); + } + switch (info.kind) { + .regular => {}, + .absolute => try writer.writeAll("ABS, "), + .tlv => try writer.writeAll("THREAD_LOCAL, "), + } + if (info.weak) try writer.writeAll("WEAK"); + if (info.kind != .regular or info.weak) { + try writer.writeAll("] "); + } + try writer.print("{x} ", .{seg.vmaddr + info.vmoffset}); + }, + else => {}, + } + + try writer.print("{s}\n", .{exp.name}); } } - } - fn dumpExportsTrie( - gpa: Allocator, - data: []const u8, - seg: macho.segment_command_64, - writer: anytype, - ) !void { - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); + const TrieIterator = struct { + data: []const u8, + pos: usize = 0, - var exports = std.ArrayList(Export).init(arena.allocator()); - var it = TrieIterator{ .data = data }; - try parseTrieNode(arena.allocator(), &it, "", &exports); + fn getStream(it: *TrieIterator) std.io.FixedBufferStream([]const u8) { + return std.io.fixedBufferStream(it.data[it.pos..]); + } - mem.sort(Export, exports.items, {}, Export.lessThan); + fn readULEB128(it: *TrieIterator) !u64 { + var stream = it.getStream(); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + const value = try std.leb.readULEB128(u64, reader); + it.pos += creader.bytes_read; + return value; + } - for (exports.items) |exp| { - switch (exp.tag) { - .@"export" => { - const info = exp.data.@"export"; - if (info.kind != .regular or info.weak) { - try writer.writeByte('['); - } - switch (info.kind) { - .regular => {}, - .absolute => try writer.writeAll("ABS, "), - .tlv => try writer.writeAll("THREAD_LOCAL, "), - } - if (info.weak) try writer.writeAll("WEAK"); - if (info.kind != .regular or info.weak) { - try writer.writeAll("] "); - } - try writer.print("{x} ", .{seg.vmaddr + info.vmoffset}); + fn readString(it: *TrieIterator) ![:0]const u8 { + var stream = it.getStream(); + const reader = stream.reader(); + + var count: usize = 0; + while (true) : (count += 1) { + const byte = try reader.readByte(); + if (byte == 0) break; + } + + const str = @as([*:0]const u8, @ptrCast(it.data.ptr + it.pos))[0..count :0]; + it.pos += count + 1; + return str; + } + + fn readByte(it: *TrieIterator) !u8 { + var stream = it.getStream(); + const value = try stream.reader().readByte(); + it.pos += 1; + return value; + } + }; + + const Export = struct { + name: []const u8, + tag: enum { @"export", reexport, stub_resolver }, + data: union { + @"export": struct { + kind: enum { regular, absolute, tlv }, + weak: bool = false, + vmoffset: u64, }, - else => {}, + reexport: u64, + stub_resolver: struct { + stub_offset: u64, + resolver_offset: u64, + }, + }, + + inline fn rankByTag(@"export": Export) u3 { + return switch (@"export".tag) { + .@"export" => 1, + .reexport => 2, + .stub_resolver => 3, + }; } - try writer.print("{s}\n", .{exp.name}); - } - } + fn lessThan(ctx: void, lhs: Export, rhs: Export) bool { + _ = ctx; + if (lhs.rankByTag() == rhs.rankByTag()) { + return switch (lhs.tag) { + .@"export" => lhs.data.@"export".vmoffset < rhs.data.@"export".vmoffset, + .reexport => lhs.data.reexport < rhs.data.reexport, + .stub_resolver => lhs.data.stub_resolver.stub_offset < rhs.data.stub_resolver.stub_offset, + }; + } + return lhs.rankByTag() < rhs.rankByTag(); + } + }; - const TrieIterator = struct { - data: []const u8, - pos: usize = 0, + fn parseTrieNode( + arena: Allocator, + it: *TrieIterator, + prefix: []const u8, + exports: *std.ArrayList(Export), + ) !void { + const size = try it.readULEB128(); + if (size > 0) { + const flags = try it.readULEB128(); + switch (flags) { + macho.EXPORT_SYMBOL_FLAGS_REEXPORT => { + const ord = try it.readULEB128(); + const name = try arena.dupe(u8, try it.readString()); + try exports.append(.{ + .name = if (name.len > 0) name else prefix, + .tag = .reexport, + .data = .{ .reexport = ord }, + }); + }, + macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => { + const stub_offset = try it.readULEB128(); + const resolver_offset = try it.readULEB128(); + try exports.append(.{ + .name = prefix, + .tag = .stub_resolver, + .data = .{ .stub_resolver = .{ + .stub_offset = stub_offset, + .resolver_offset = resolver_offset, + } }, + }); + }, + else => { + const vmoff = try it.readULEB128(); + try exports.append(.{ + .name = prefix, + .tag = .@"export", + .data = .{ .@"export" = .{ + .kind = switch (flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK) { + macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR => .regular, + macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => .absolute, + macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => .tlv, + else => unreachable, + }, + .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0, + .vmoffset = vmoff, + } }, + }); + }, + } + } - fn getStream(it: *TrieIterator) std.io.FixedBufferStream([]const u8) { - return std.io.fixedBufferStream(it.data[it.pos..]); + const nedges = try it.readByte(); + for (0..nedges) |_| { + const label = try it.readString(); + const off = try it.readULEB128(); + const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label }); + const curr = it.pos; + it.pos = off; + try parseTrieNode(arena, it, prefix_label, exports); + it.pos = curr; + } } + }; - fn readULEB128(it: *TrieIterator) !u64 { - var stream = it.getStream(); - var creader = std.io.countingReader(stream.reader()); - const reader = creader.reader(); - const value = try std.leb.readULEB128(u64, reader); - it.pos += creader.bytes_read; - return value; + fn parseAndDumpObject(step: *Step, check: Check, bytes: []const u8) ![]const u8 { + const gpa = step.owner.allocator; + const hdr = @as(*align(1) const macho.mach_header_64, @ptrCast(bytes.ptr)).*; + if (hdr.magic != macho.MH_MAGIC_64) { + return error.InvalidMagicNumber; } - fn readString(it: *TrieIterator) ![:0]const u8 { - var stream = it.getStream(); - const reader = stream.reader(); + var ctx = ObjectContext{ .gpa = gpa, .data = bytes, .header = hdr }; + try ctx.parse(); - var count: usize = 0; - while (true) : (count += 1) { - const byte = try reader.readByte(); - if (byte == 0) break; - } + var output = std.ArrayList(u8).init(gpa); + const writer = output.writer(); - const str = @as([*:0]const u8, @ptrCast(it.data.ptr + it.pos))[0..count :0]; - it.pos += count + 1; - return str; - } + switch (check.kind) { + .headers => { + try ObjectContext.dumpHeader(ctx.header, writer); - fn readByte(it: *TrieIterator) !u8 { - var stream = it.getStream(); - const value = try stream.reader().readByte(); - it.pos += 1; - return value; - } - }; + var it = ctx.getLoadCommandIterator(); + var i: usize = 0; + while (it.next()) |cmd| { + try ObjectContext.dumpLoadCommand(cmd, i, writer); + try writer.writeByte('\n'); - const Export = struct { - name: []const u8, - tag: enum { @"export", reexport, stub_resolver }, - data: union { - @"export": struct { - kind: enum { regular, absolute, tlv }, - weak: bool = false, - vmoffset: u64, - }, - reexport: u64, - stub_resolver: struct { - stub_offset: u64, - resolver_offset: u64, + i += 1; + } }, - }, - inline fn rankByTag(@"export": Export) u3 { - return switch (@"export".tag) { - .@"export" => 1, - .reexport => 2, - .stub_resolver => 3, - }; - } + .symtab => if (ctx.symtab.items.len > 0) { + try ctx.dumpSymtab(writer); + } else return step.fail("no symbol table found", .{}), - fn lessThan(ctx: void, lhs: Export, rhs: Export) bool { - _ = ctx; - if (lhs.rankByTag() == rhs.rankByTag()) { - return switch (lhs.tag) { - .@"export" => lhs.data.@"export".vmoffset < rhs.data.@"export".vmoffset, - .reexport => lhs.data.reexport < rhs.data.reexport, - .stub_resolver => lhs.data.stub_resolver.stub_offset < rhs.data.stub_resolver.stub_offset, - }; - } - return lhs.rankByTag() < rhs.rankByTag(); - } - }; + .indirect_symtab => if (ctx.symtab.items.len > 0 and ctx.indsymtab.items.len > 0) { + try ctx.dumpIndirectSymtab(writer); + } else return step.fail("no indirect symbol table found", .{}), - fn parseTrieNode( - arena: Allocator, - it: *TrieIterator, - prefix: []const u8, - exports: *std.ArrayList(Export), - ) !void { - const size = try it.readULEB128(); - if (size > 0) { - const flags = try it.readULEB128(); - switch (flags) { - macho.EXPORT_SYMBOL_FLAGS_REEXPORT => { - const ord = try it.readULEB128(); - const name = try arena.dupe(u8, try it.readString()); - try exports.append(.{ - .name = if (name.len > 0) name else prefix, - .tag = .reexport, - .data = .{ .reexport = ord }, - }); - }, - macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER => { - const stub_offset = try it.readULEB128(); - const resolver_offset = try it.readULEB128(); - try exports.append(.{ - .name = prefix, - .tag = .stub_resolver, - .data = .{ .stub_resolver = .{ - .stub_offset = stub_offset, - .resolver_offset = resolver_offset, - } }, - }); - }, - else => { - const vmoff = try it.readULEB128(); - try exports.append(.{ - .name = prefix, - .tag = .@"export", - .data = .{ .@"export" = .{ - .kind = switch (flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK) { - macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR => .regular, - macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE => .absolute, - macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL => .tlv, - else => unreachable, - }, - .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0, - .vmoffset = vmoff, - } }, - }); - }, - } - } + .dyld_rebase, + .dyld_bind, + .dyld_weak_bind, + .dyld_lazy_bind, + => { + const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse + return step.fail("no dyld info found", .{}); + const lc = cmd.cast(macho.dyld_info_command).?; - const nedges = try it.readByte(); - for (0..nedges) |_| { - const label = try it.readString(); - const off = try it.readULEB128(); - const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label }); - const curr = it.pos; - it.pos = off; - try parseTrieNode(arena, it, prefix_label, exports); - it.pos = curr; + switch (check.kind) { + .dyld_rebase => if (lc.rebase_size > 0) { + const data = ctx.data[lc.rebase_off..][0..lc.rebase_size]; + try writer.writeAll(dyld_rebase_label ++ "\n"); + try ctx.dumpRebaseInfo(data, writer); + } else return step.fail("no rebase data found", .{}), + + .dyld_bind => if (lc.bind_size > 0) { + const data = ctx.data[lc.bind_off..][0..lc.bind_size]; + try writer.writeAll(dyld_bind_label ++ "\n"); + try ctx.dumpBindInfo(data, writer); + } else return step.fail("no bind data found", .{}), + + .dyld_weak_bind => if (lc.weak_bind_size > 0) { + const data = ctx.data[lc.weak_bind_off..][0..lc.weak_bind_size]; + try writer.writeAll(dyld_weak_bind_label ++ "\n"); + try ctx.dumpBindInfo(data, writer); + } else return step.fail("no weak bind data found", .{}), + + .dyld_lazy_bind => if (lc.lazy_bind_size > 0) { + const data = ctx.data[lc.lazy_bind_off..][0..lc.lazy_bind_size]; + try writer.writeAll(dyld_lazy_bind_label ++ "\n"); + try ctx.dumpBindInfo(data, writer); + } else return step.fail("no lazy bind data found", .{}), + + else => unreachable, + } + }, + + .exports => blk: { + if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { + const lc = cmd.cast(macho.dyld_info_command).?; + if (lc.export_size > 0) { + const data = ctx.data[lc.export_off..][0..lc.export_size]; + try writer.writeAll(exports_label ++ "\n"); + try ctx.dumpExportsTrie(data, writer); + break :blk; + } + } + return step.fail("no exports data found", .{}); + }, + + else => return step.fail("invalid check kind for MachO file format: {s}", .{@tagName(check.kind)}), } + + return output.toOwnedSlice(); } }; |
