allocator: Allocator, dwarf: Dwarf, file: fs.File, symtab_cmd: macho.symtab_command = .{}, segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.ArrayListUnmanaged(macho.section_64) = .{}, dwarf_segment_cmd_index: ?u8 = null, linkedit_segment_cmd_index: ?u8 = null, debug_info_section_index: ?u8 = null, debug_abbrev_section_index: ?u8 = null, debug_str_section_index: ?u8 = null, debug_aranges_section_index: ?u8 = null, debug_line_section_index: ?u8 = null, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, strtab: StringTable(.strtab) = .{}, relocs: std.ArrayListUnmanaged(Reloc) = .{}, pub const Reloc = struct { type: enum { direct_load, got_load, }, target: u32, offset: u64, addend: u32, prev_vaddr: u64, }; /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, macho_file: *MachO) !void { if (self.dwarf_segment_cmd_index == null) { self.dwarf_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); const off = @as(u64, @intCast(page_size)); const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForward(u64, padToIdeal(ideal_size), page_size); log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.segments.append(self.allocator, .{ .segname = makeStaticString("__DWARF"), .vmsize = needed_size, .fileoff = off, .filesize = needed_size, .cmdsize = @sizeOf(macho.segment_command_64), }); } if (self.debug_str_section_index == null) { assert(self.dwarf.strtab.buffer.items.len == 0); try self.dwarf.strtab.buffer.append(self.allocator, 0); self.debug_str_section_index = try self.allocateSection( "__debug_str", @as(u32, @intCast(self.dwarf.strtab.buffer.items.len)), 0, ); self.debug_string_table_dirty = true; } if (self.debug_info_section_index == null) { self.debug_info_section_index = try self.allocateSection("__debug_info", 200, 0); self.debug_info_header_dirty = true; } if (self.debug_abbrev_section_index == null) { self.debug_abbrev_section_index = try self.allocateSection("__debug_abbrev", 128, 0); self.debug_abbrev_section_dirty = true; } if (self.debug_aranges_section_index == null) { self.debug_aranges_section_index = try self.allocateSection("__debug_aranges", 160, 4); self.debug_aranges_section_dirty = true; } if (self.debug_line_section_index == null) { self.debug_line_section_index = try self.allocateSection("__debug_line", 250, 0); self.debug_line_header_dirty = true; } if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @as(u8, @intCast(self.segments.items.len)); try self.segments.append(self.allocator, .{ .segname = makeStaticString("__LINKEDIT"), .maxprot = macho.PROT.READ, .initprot = macho.PROT.READ, .cmdsize = @sizeOf(macho.segment_command_64), }); } } fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u8 { const segment = self.getDwarfSegmentPtr(); var sect = macho.section_64{ .sectname = makeStaticString(sectname), .segname = segment.segname, .size = @as(u32, @intCast(size)), .@"align" = alignment, }; const alignment_pow_2 = try math.powi(u32, 2, alignment); const off = self.findFreeSpace(size, alignment_pow_2); log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ sect.segName(), sect.sectName(), off, off + size, }); sect.offset = @as(u32, @intCast(off)); const index = @as(u8, @intCast(self.sections.items.len)); try self.sections.append(self.allocator, sect); segment.cmdsize += @sizeOf(macho.section_64); segment.nsects += 1; return index; } pub fn growSection(self: *DebugSymbols, sect_index: u8, needed_size: u32, requires_file_copy: bool) !void { const sect = self.getSectionPtr(sect_index); if (needed_size > self.allocatedSize(sect.offset)) { const existing_size = sect.size; sect.size = 0; // free the space const new_offset = self.findFreeSpace(needed_size, 1); log.debug("moving {s} section: {} bytes from 0x{x} to 0x{x}", .{ sect.sectName(), existing_size, sect.offset, new_offset, }); if (requires_file_copy) { const amt = try self.file.copyRangeAll( sect.offset, self.file, new_offset, existing_size, ); if (amt != existing_size) return error.InputOutput; } sect.offset = @as(u32, @intCast(new_offset)); } sect.size = needed_size; self.markDirty(sect_index); } pub fn markDirty(self: *DebugSymbols, sect_index: u8) void { if (self.debug_info_section_index.? == sect_index) { self.debug_info_header_dirty = true; } else if (self.debug_line_section_index.? == sect_index) { self.debug_line_header_dirty = true; } else if (self.debug_abbrev_section_index.? == sect_index) { self.debug_abbrev_section_dirty = true; } else if (self.debug_str_section_index.? == sect_index) { self.debug_string_table_dirty = true; } else if (self.debug_aranges_section_index.? == sect_index) { self.debug_aranges_section_dirty = true; } } fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { const end = start + padToIdeal(size); for (self.sections.items) |section| { const increased_size = padToIdeal(section.size); const test_end = section.offset + increased_size; if (end > section.offset and start < test_end) { return test_end; } } return null; } fn findFreeSpace(self: *DebugSymbols, object_size: u64, min_alignment: u64) u64 { const segment = self.getDwarfSegmentPtr(); var offset: u64 = segment.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForward(u64, item_end, min_alignment); } return offset; } pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void { // TODO This linker code currently assumes there is only 1 compilation unit // and it corresponds to the Zig source code. const options = macho_file.base.options; const module = options.module orelse return error.LinkingWithoutZigSourceUnimplemented; for (self.relocs.items) |*reloc| { const sym = switch (reloc.type) { .direct_load => macho_file.getSymbol(.{ .sym_index = reloc.target }), .got_load => blk: { const got_index = macho_file.got_table.lookup.get(.{ .sym_index = reloc.target }).?; const got_entry = macho_file.got_table.entries.items[got_index]; break :blk macho_file.getSymbol(got_entry); }, }; if (sym.n_value == reloc.prev_vaddr) continue; const sym_name = switch (reloc.type) { .direct_load => macho_file.getSymbolName(.{ .sym_index = reloc.target }), .got_load => blk: { const got_index = macho_file.got_table.lookup.get(.{ .sym_index = reloc.target }).?; const got_entry = macho_file.got_table.entries.items[got_index]; break :blk macho_file.getSymbolName(got_entry); }, }; const sect = &self.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, sym.n_value, sym_name, file_offset, }); try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset); reloc.prev_vaddr = sym.n_value; } if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(); self.debug_abbrev_section_dirty = false; } if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. const text_section = macho_file.sections.items(.header)[macho_file.text_section_index.?]; const low_pc = text_section.addr; const high_pc = text_section.addr + text_section.size; try self.dwarf.writeDbgInfoHeader(module, low_pc, high_pc); self.debug_info_header_dirty = false; } if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. const text_section = macho_file.sections.items(.header)[macho_file.text_section_index.?]; try self.dwarf.writeDbgAranges(text_section.addr, text_section.size); self.debug_aranges_section_dirty = false; } if (self.debug_line_header_dirty) { try self.dwarf.writeDbgLineHeader(); self.debug_line_header_dirty = false; } { const sect_index = self.debug_str_section_index.?; if (self.debug_string_table_dirty or self.dwarf.strtab.buffer.items.len != self.getSection(sect_index).size) { const needed_size = @as(u32, @intCast(self.dwarf.strtab.buffer.items.len)); try self.growSection(sect_index, needed_size, false); try self.file.pwriteAll(self.dwarf.strtab.buffer.items, self.getSection(sect_index).offset); self.debug_string_table_dirty = false; } } self.finalizeDwarfSegment(macho_file); try self.writeLinkeditSegmentData(macho_file); // Write load commands var lc_buffer = std.ArrayList(u8).init(self.allocator); defer lc_buffer.deinit(); const lc_writer = lc_buffer.writer(); try self.writeSegmentHeaders(macho_file, lc_writer); try lc_writer.writeStruct(self.symtab_cmd); try lc_writer.writeStruct(macho_file.uuid_cmd); const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); try self.writeHeader(macho_file, ncmds, @as(u32, @intCast(lc_buffer.items.len))); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols) void { const gpa = self.allocator; self.file.close(); self.segments.deinit(gpa); self.sections.deinit(gpa); self.dwarf.deinit(); self.strtab.deinit(gpa); self.relocs.deinit(gpa); } pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { // TODO re-implement using a hashmap with free lists var last_index: usize = 0; while (last_index < self.relocs.items.len) { const reloc = self.relocs.items[last_index]; if (reloc.target == target) { _ = self.relocs.swapRemove(last_index); } else { last_index += 1; } } } fn finalizeDwarfSegment(self: *DebugSymbols, macho_file: *MachO) void { const base_vmaddr = blk: { // Note that we purposely take the last VM address of the MachO binary including // the binary's LINKEDIT segment. This is in contrast to how dsymutil does it // which overwrites the the address space taken by the original MachO binary, // however at the cost of having LINKEDIT preceed DWARF in dSYM binary which we // do not want as we want to be able to incrementally move DWARF sections in the // file as we please. const last_seg = macho_file.getLinkeditSegmentPtr(); break :blk last_seg.vmaddr + last_seg.vmsize; }; const dwarf_segment = self.getDwarfSegmentPtr(); var file_size: u64 = 0; for (self.sections.items) |header| { file_size = @max(file_size, header.offset + header.size); } const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); const aligned_size = mem.alignForward(u64, file_size, page_size); dwarf_segment.vmaddr = base_vmaddr; dwarf_segment.filesize = aligned_size; dwarf_segment.vmsize = aligned_size; const linkedit = self.getLinkeditSegmentPtr(); linkedit.vmaddr = mem.alignForward( u64, dwarf_segment.vmaddr + aligned_size, page_size, ); linkedit.fileoff = mem.alignForward( u64, dwarf_segment.fileoff + aligned_size, page_size, ); log.debug("found __LINKEDIT segment free space at 0x{x}", .{linkedit.fileoff}); } fn writeSegmentHeaders(self: *DebugSymbols, macho_file: *MachO, writer: anytype) !void { // Write segment/section headers from the binary file first. const end = macho_file.linkedit_segment_cmd_index.?; for (macho_file.segments.items[0..end], 0..) |seg, i| { const indexes = macho_file.getSectionIndexes(@as(u8, @intCast(i))); var out_seg = seg; out_seg.fileoff = 0; out_seg.filesize = 0; out_seg.cmdsize = @sizeOf(macho.segment_command_64); out_seg.nsects = 0; // Update section headers count; any section with size of 0 is excluded // since it doesn't have any data in the final binary file. for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { if (header.size == 0) continue; out_seg.cmdsize += @sizeOf(macho.section_64); out_seg.nsects += 1; } if (out_seg.nsects == 0 and (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or mem.eql(u8, out_seg.segName(), "__DATA"))) continue; try writer.writeStruct(out_seg); for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { if (header.size == 0) continue; var out_header = header; out_header.offset = 0; try writer.writeStruct(out_header); } } // Next, commit DSYM's __LINKEDIT and __DWARF segments headers. for (self.segments.items, 0..) |seg, i| { const indexes = self.getSectionIndexes(@as(u8, @intCast(i))); try writer.writeStruct(seg); for (self.sections.items[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } } } fn writeHeader(self: *DebugSymbols, macho_file: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; switch (macho_file.base.options.target.cpu.arch) { .aarch64 => { header.cputype = macho.CPU_TYPE_ARM64; header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; }, .x86_64 => { header.cputype = macho.CPU_TYPE_X86_64; header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; }, else => return error.UnsupportedCpuArchitecture, } header.ncmds = ncmds; header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); try self.file.pwriteAll(mem.asBytes(&header), 0); } fn allocatedSize(self: *DebugSymbols, start: u64) u64 { const seg = self.getDwarfSegmentPtr(); assert(start >= seg.fileoff); var min_pos: u64 = std.math.maxInt(u64); for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } fn writeLinkeditSegmentData(self: *DebugSymbols, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); try self.writeSymtab(macho_file); try self.writeStrtab(); const page_size = MachO.getPageSize(macho_file.base.options.target.cpu.arch); const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; const aligned_size = mem.alignForward(u64, seg.filesize, page_size); seg.vmsize = aligned_size; } fn writeSymtab(self: *DebugSymbols, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.allocator; var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); for (macho_file.locals.items, 0..) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip const sym_loc = MachO.SymbolWithLoc{ .sym_index = @as(u32, @intCast(sym_id)) }; if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, macho_file.getSymbolName(sym_loc)); try locals.append(out_sym); } var exports = std.ArrayList(macho.nlist_64).init(gpa); defer exports.deinit(); for (macho_file.globals.items) |global| { const sym = macho_file.getSymbol(global); if (sym.undf()) continue; // import, skip var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, macho_file.getSymbolName(global)); try exports.append(out_sym); } const nlocals = locals.items.len; const nexports = exports.items.len; const nsyms = nlocals + nexports; const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; const offset = mem.alignForward(u64, seg.fileoff, @alignOf(macho.nlist_64)); const needed_size = nsyms * @sizeOf(macho.nlist_64); seg.filesize = offset + needed_size - seg.fileoff; self.symtab_cmd.symoff = @as(u32, @intCast(offset)); self.symtab_cmd.nsyms = @as(u32, @intCast(nsyms)); const locals_off = @as(u32, @intCast(offset)); const locals_size = nlocals * @sizeOf(macho.nlist_64); const exports_off = locals_off + locals_size; const exports_size = nexports * @sizeOf(macho.nlist_64); log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); } fn writeStrtab(self: *DebugSymbols) !void { const tracy = trace(@src()); defer tracy.end(); const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; const symtab_size = @as(u32, @intCast(self.symtab_cmd.nsyms * @sizeOf(macho.nlist_64))); const offset = mem.alignForward(u64, self.symtab_cmd.symoff + symtab_size, @alignOf(u64)); const needed_size = mem.alignForward(u64, self.strtab.buffer.items.len, @alignOf(u64)); seg.filesize = offset + needed_size - seg.fileoff; self.symtab_cmd.stroff = @as(u32, @intCast(offset)); self.symtab_cmd.strsize = @as(u32, @intCast(needed_size)); log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); try self.file.pwriteAll(self.strtab.buffer.items, offset); if (self.strtab.buffer.items.len < needed_size) { // Ensure we are always padded to the actual length of the file. try self.file.pwriteAll(&[_]u8{0}, offset + needed_size); } } pub fn getSectionIndexes(self: *DebugSymbols, segment_index: u8) struct { start: u8, end: u8 } { var start: u8 = 0; const nsects = for (self.segments.items, 0..) |seg, i| { if (i == segment_index) break @as(u8, @intCast(seg.nsects)); start += @as(u8, @intCast(seg.nsects)); } else 0; return .{ .start = start, .end = start + nsects }; } fn getDwarfSegmentPtr(self: *DebugSymbols) *macho.segment_command_64 { const index = self.dwarf_segment_cmd_index.?; return &self.segments.items[index]; } fn getLinkeditSegmentPtr(self: *DebugSymbols) *macho.segment_command_64 { const index = self.linkedit_segment_cmd_index.?; return &self.segments.items[index]; } pub fn getSectionPtr(self: *DebugSymbols, sect: u8) *macho.section_64 { assert(sect < self.sections.items.len); return &self.sections.items[sect]; } pub fn getSection(self: DebugSymbols, sect: u8) macho.section_64 { assert(sect < self.sections.items.len); return self.sections.items[sect]; } const DebugSymbols = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; const fs = std.fs; const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); const log = std.log.scoped(.dsym); const macho = std.macho; const makeStaticString = MachO.makeStaticString; const math = std.math; const mem = std.mem; const padToIdeal = MachO.padToIdeal; const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Module = @import("../../Module.zig"); const StringTable = @import("../strtab.zig").StringTable; const Type = @import("../../type.zig").Type;