//! Represents an input relocatable Object file.
//! Each Object is fully loaded into memory for easier
//! access into different data within.

name: []const u8,
mtime: u64,
contents: []align(@alignOf(u64)) const u8,

header: macho.mach_header_64 = undefined,

/// Symtab and strtab might not exist for empty object files so we use an optional
/// to signal this.
in_symtab: ?[]align(1) const macho.nlist_64 = null,
in_strtab: ?[]const u8 = null,

/// Output symtab is sorted so that we can easily reference symbols following each
/// other in address space.
/// The length of the symtab is at least of the input symtab length however there
/// can be trailing section symbols.
symtab: []macho.nlist_64 = undefined,
/// Can be undefined as set together with in_symtab.
source_symtab_lookup: []u32 = undefined,
/// Can be undefined as set together with in_symtab.
reverse_symtab_lookup: []u32 = undefined,
/// Can be undefined as set together with in_symtab.
source_address_lookup: []i64 = undefined,
/// Can be undefined as set together with in_symtab.
source_section_index_lookup: []Entry = undefined,
/// Can be undefined as set together with in_symtab.
strtab_lookup: []u32 = undefined,
/// Can be undefined as set together with in_symtab.
atom_by_index_table: []?Atom.Index = undefined,
/// Can be undefined as set together with in_symtab.
globals_lookup: []i64 = undefined,
/// Can be undefined as set together with in_symtab.
relocs_lookup: []Entry = undefined,

/// All relocations sorted and flatened, sorted by address descending
/// per section.
relocations: std.ArrayListUnmanaged(macho.relocation_info) = .{},
/// Beginning index to the relocations array for each input section
/// defined within this Object file.
section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{},

/// Data-in-code records sorted by address.
data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},

atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
exec_atoms: std.ArrayListUnmanaged(Atom.Index) = .{},

eh_frame_sect_id: ?u8 = null,
eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{},
eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},

unwind_info_sect_id: ?u8 = null,
unwind_relocs_lookup: []Record = undefined,
unwind_records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},

const Entry = struct {
    start: u32 = 0,
    len: u32 = 0,
};

const Record = struct {
    dead: bool,
    reloc: Entry,
};

pub fn isObject(file: std.fs.File) bool {
    const reader = file.reader();
    const hdr = reader.readStruct(macho.mach_header_64) catch return false;
    defer file.seekTo(0) catch {};
    return hdr.filetype == macho.MH_OBJECT;
}

pub fn deinit(self: *Object, gpa: Allocator) void {
    self.atoms.deinit(gpa);
    self.exec_atoms.deinit(gpa);
    gpa.free(self.name);
    gpa.free(self.contents);
    if (self.in_symtab) |_| {
        gpa.free(self.source_symtab_lookup);
        gpa.free(self.reverse_symtab_lookup);
        gpa.free(self.source_address_lookup);
        gpa.free(self.source_section_index_lookup);
        gpa.free(self.strtab_lookup);
        gpa.free(self.symtab);
        gpa.free(self.atom_by_index_table);
        gpa.free(self.globals_lookup);
        gpa.free(self.relocs_lookup);
    }
    self.eh_frame_relocs_lookup.deinit(gpa);
    self.eh_frame_records_lookup.deinit(gpa);
    if (self.hasUnwindRecords()) {
        gpa.free(self.unwind_relocs_lookup);
    }
    self.unwind_records_lookup.deinit(gpa);
    self.relocations.deinit(gpa);
    self.section_relocs_lookup.deinit(gpa);
    self.data_in_code.deinit(gpa);
}

pub fn parse(self: *Object, allocator: Allocator) !void {
    var stream = std.io.fixedBufferStream(self.contents);
    const reader = stream.reader();

    self.header = try reader.readStruct(macho.mach_header_64);

    var it = LoadCommandIterator{
        .ncmds = self.header.ncmds,
        .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
    };
    const nsects = self.getSourceSections().len;

    // Prepopulate relocations per section lookup table.
    try self.section_relocs_lookup.resize(allocator, nsects);
    @memset(self.section_relocs_lookup.items, 0);

    // Parse symtab.
    const symtab = while (it.next()) |cmd| switch (cmd.cmd()) {
        .SYMTAB => break cmd.cast(macho.symtab_command).?,
        else => {},
    } else return;

    self.in_symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.contents.ptr + symtab.symoff))[0..symtab.nsyms];
    self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize];

    self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects);
    self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
    self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
    self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
    self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
    self.atom_by_index_table = try allocator.alloc(?Atom.Index, self.in_symtab.?.len + nsects);
    self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects);
    // This is wasteful but we need to be able to lookup source symbol address after stripping and
    // allocating of sections.
    self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
    self.source_section_index_lookup = try allocator.alloc(Entry, nsects);

    for (self.symtab) |*sym| {
        sym.* = .{
            .n_value = 0,
            .n_sect = 0,
            .n_desc = 0,
            .n_strx = 0,
            .n_type = 0,
        };
    }

    @memset(self.globals_lookup, -1);
    @memset(self.atom_by_index_table, null);
    @memset(self.source_section_index_lookup, .{});
    @memset(self.relocs_lookup, .{});

    // You would expect that the symbol table is at least pre-sorted based on symbol's type:
    // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
    // the GO compiler does not necessarily respect that therefore we sort immediately by type
    // and address within.
    var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len);
    defer sorted_all_syms.deinit();

    for (self.in_symtab.?, 0..) |_, index| {
        sorted_all_syms.appendAssumeCapacity(.{ .index = @as(u32, @intCast(index)) });
    }

    // We sort by type: defined < undefined, and
    // afterwards by address in each group. Normally, dysymtab should
    // be enough to guarantee the sort, but turns out not every compiler
    // is kind enough to specify the symbols in the correct order.
    mem.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan);

    var prev_sect_id: u8 = 0;
    var section_index_lookup: ?Entry = null;
    for (sorted_all_syms.items, 0..) |sym_id, i| {
        const sym = sym_id.getSymbol(self);

        if (section_index_lookup) |*lookup| {
            if (sym.n_sect != prev_sect_id or sym.undf()) {
                self.source_section_index_lookup[prev_sect_id - 1] = lookup.*;
                section_index_lookup = null;
            } else {
                lookup.len += 1;
            }
        }
        if (sym.sect() and section_index_lookup == null) {
            section_index_lookup = .{ .start = @as(u32, @intCast(i)), .len = 1 };
        }

        prev_sect_id = sym.n_sect;

        self.symtab[i] = sym;
        self.source_symtab_lookup[i] = sym_id.index;
        self.reverse_symtab_lookup[sym_id.index] = @as(u32, @intCast(i));
        self.source_address_lookup[i] = if (sym.undf()) -1 else @as(i64, @intCast(sym.n_value));

        const sym_name_len = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.in_strtab.?.ptr + sym.n_strx)), 0).len + 1;
        self.strtab_lookup[i] = @as(u32, @intCast(sym_name_len));
    }

    // If there were no undefined symbols, make sure we populate the
    // source section index lookup for the last scanned section.
    if (section_index_lookup) |lookup| {
        self.source_section_index_lookup[prev_sect_id - 1] = lookup;
    }

    // Parse __TEXT,__eh_frame header if one exists
    self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame");

    // Parse __LD,__compact_unwind header if one exists
    self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind");
    if (self.hasUnwindRecords()) {
        self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len);
        @memset(self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} });
    }
}

const SymbolAtIndex = struct {
    index: u32,

    const Context = *const Object;

    fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 {
        return ctx.in_symtab.?[self.index];
    }

    fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 {
        const off = self.getSymbol(ctx).n_strx;
        return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.in_strtab.?.ptr + off)), 0);
    }

    fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 {
        const sym = self.getSymbol(ctx);
        if (!sym.ext()) {
            const sym_name = self.getSymbolName(ctx);
            if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 3;
            return 2;
        }
        if (sym.weakDef() or sym.pext()) return 1;
        return 0;
    }

    /// Performs lexicographic-like check.
    /// * lhs and rhs defined
    ///   * if lhs == rhs
    ///     * if lhs.n_sect == rhs.n_sect
    ///       * ext < weak < local < temp
    ///     * lhs.n_sect < rhs.n_sect
    ///   * lhs < rhs
    /// * !rhs is undefined
    fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
        const lhs = lhs_index.getSymbol(ctx);
        const rhs = rhs_index.getSymbol(ctx);
        if (lhs.sect() and rhs.sect()) {
            if (lhs.n_value == rhs.n_value) {
                if (lhs.n_sect == rhs.n_sect) {
                    const lhs_senior = lhs_index.getSymbolSeniority(ctx);
                    const rhs_senior = rhs_index.getSymbolSeniority(ctx);
                    if (lhs_senior == rhs_senior) {
                        return lessThanByNStrx(ctx, lhs_index, rhs_index);
                    } else return lhs_senior < rhs_senior;
                } else return lhs.n_sect < rhs.n_sect;
            } else return lhs.n_value < rhs.n_value;
        } else if (lhs.undf() and rhs.undf()) {
            return lessThanByNStrx(ctx, lhs_index, rhs_index);
        } else return rhs.undf();
    }

    fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool {
        return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx;
    }
};

fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct {
    index: u32,
    len: u32,
} {
    const FirstMatch = struct {
        n_sect: u8,

        pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
            return symbol.n_sect == pred.n_sect;
        }
    };
    const FirstNonMatch = struct {
        n_sect: u8,

        pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
            return symbol.n_sect != pred.n_sect;
        }
    };

    const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{
        .n_sect = n_sect,
    });
    const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{
        .n_sect = n_sect,
    });

    return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) };
}

fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct {
    index: u32,
    len: u32,
} {
    const Predicate = struct {
        addr: u64,

        pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
            return symbol.n_value >= pred.addr;
        }
    };

    const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{
        .addr = start_addr,
    });
    const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{
        .addr = end_addr,
    });

    return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) };
}

const SortedSection = struct {
    header: macho.section_64,
    id: u8,
};

fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool {
    _ = ctx;
    if (lhs.header.addr == rhs.header.addr) {
        return lhs.id < rhs.id;
    }
    return lhs.header.addr < rhs.header.addr;
}

pub const SplitIntoAtomsError = error{
    OutOfMemory,
    EndOfStream,
    MissingEhFrameSection,
    BadDwarfCfi,
};

pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) SplitIntoAtomsError!void {
    log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name });

    try self.splitRegularSections(macho_file, object_id);
    try self.parseEhFrameSection(macho_file, object_id);
    try self.parseUnwindInfo(macho_file, object_id);
    try self.parseDataInCode(macho_file.base.allocator);
}

/// Splits input regular sections into Atoms.
/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section
/// into subsections where each subsection then represents an Atom.
pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void {
    const gpa = macho_file.base.allocator;

    const sections = self.getSourceSections();
    for (sections, 0..) |sect, id| {
        if (sect.isDebug()) continue;
        const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse {
            log.debug("  unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() });
            continue;
        };
        if (sect.size == 0) continue;

        const sect_id = @as(u8, @intCast(id));
        const sym = self.getSectionAliasSymbolPtr(sect_id);
        sym.* = .{
            .n_strx = 0,
            .n_type = macho.N_SECT,
            .n_sect = out_sect_id + 1,
            .n_desc = 0,
            .n_value = sect.addr,
        };
    }

    if (self.in_symtab == null) {
        for (sections, 0..) |sect, id| {
            if (sect.isDebug()) continue;
            const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue;
            if (sect.size == 0) continue;

            const sect_id: u8 = @intCast(id);
            const sym_index = self.getSectionAliasSymbolIndex(sect_id);
            const atom_index = try self.createAtomFromSubsection(
                macho_file,
                object_id,
                sym_index,
                sym_index,
                1,
                sect.size,
                Alignment.fromLog2Units(sect.@"align"),
                out_sect_id,
            );
            macho_file.addAtomToSection(atom_index);
        }
        return;
    }

    // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
    // have to infer the start of undef section in the symtab ourselves.
    const iundefsym = blk: {
        const dysymtab = self.getDysymtab() orelse {
            var iundefsym: usize = self.in_symtab.?.len;
            while (iundefsym > 0) : (iundefsym -= 1) {
                const sym = self.symtab[iundefsym - 1];
                if (sym.sect()) break;
            }
            break :blk iundefsym;
        };
        break :blk dysymtab.iundefsym;
    };

    // We only care about defined symbols, so filter every other out.
    const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]);
    defer gpa.free(symtab);

    const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;

    // Sort section headers by address.
    var sorted_sections = try gpa.alloc(SortedSection, sections.len);
    defer gpa.free(sorted_sections);

    for (sections, 0..) |sect, id| {
        sorted_sections[id] = .{ .header = sect, .id = @as(u8, @intCast(id)) };
    }

    mem.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress);

    var sect_sym_index: u32 = 0;
    for (sorted_sections) |section| {
        const sect = section.header;
        if (sect.isDebug()) continue;

        const sect_id = section.id;
        log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() });

        // Get output segment/section in the final artifact.
        const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue;

        log.debug("  output sect({d}, '{s},{s}')", .{
            out_sect_id + 1,
            macho_file.sections.items(.header)[out_sect_id].segName(),
            macho_file.sections.items(.header)[out_sect_id].sectName(),
        });

        try self.parseRelocs(gpa, section.id);

        const cpu_arch = macho_file.base.options.target.cpu.arch;
        const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1);
        const sect_start_index = sect_sym_index + sect_loc.index;

        sect_sym_index += sect_loc.len;

        if (sect.size == 0) continue;
        if (subsections_via_symbols and sect_loc.len > 0) {
            // If the first nlist does not match the start of the section,
            // then we need to encapsulate the memory range [section start, first symbol)
            // as a temporary symbol and insert the matching Atom.
            const first_sym = symtab[sect_start_index];
            if (first_sym.n_value > sect.addr) {
                const sym_index = self.getSectionAliasSymbolIndex(sect_id);
                const atom_size = first_sym.n_value - sect.addr;
                const atom_index = try self.createAtomFromSubsection(
                    macho_file,
                    object_id,
                    sym_index,
                    sym_index,
                    1,
                    atom_size,
                    Alignment.fromLog2Units(sect.@"align"),
                    out_sect_id,
                );
                if (!sect.isZerofill()) {
                    try self.cacheRelocs(macho_file, atom_index);
                }
                macho_file.addAtomToSection(atom_index);
            }

            var next_sym_index = sect_start_index;
            while (next_sym_index < sect_start_index + sect_loc.len) {
                const next_sym = symtab[next_sym_index];
                const addr = next_sym.n_value;
                const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1);
                assert(atom_loc.len > 0);
                const atom_sym_index = atom_loc.index + next_sym_index;
                const nsyms_trailing = atom_loc.len;
                next_sym_index += atom_loc.len;

                const atom_size = if (next_sym_index < sect_start_index + sect_loc.len)
                    symtab[next_sym_index].n_value - addr
                else
                    sect.addr + sect.size - addr;

                const atom_align = Alignment.fromLog2Units(if (addr > 0)
                    @min(@ctz(addr), sect.@"align")
                else
                    sect.@"align");

                const atom_index = try self.createAtomFromSubsection(
                    macho_file,
                    object_id,
                    atom_sym_index,
                    atom_sym_index,
                    nsyms_trailing,
                    atom_size,
                    atom_align,
                    out_sect_id,
                );

                // TODO rework this at the relocation level
                if (cpu_arch == .x86_64 and addr == sect.addr) {
                    // In x86_64 relocs, it can so happen that the compiler refers to the same
                    // atom by both the actual assigned symbol and the start of the section. In this
                    // case, we need to link the two together so add an alias.
                    const alias_index = self.getSectionAliasSymbolIndex(sect_id);
                    self.atom_by_index_table[alias_index] = atom_index;
                }
                if (!sect.isZerofill()) {
                    try self.cacheRelocs(macho_file, atom_index);
                }
                macho_file.addAtomToSection(atom_index);
            }
        } else {
            const alias_index = self.getSectionAliasSymbolIndex(sect_id);
            const atom_index = try self.createAtomFromSubsection(
                macho_file,
                object_id,
                alias_index,
                sect_start_index,
                sect_loc.len,
                sect.size,
                Alignment.fromLog2Units(sect.@"align"),
                out_sect_id,
            );
            if (!sect.isZerofill()) {
                try self.cacheRelocs(macho_file, atom_index);
            }
            macho_file.addAtomToSection(atom_index);
        }
    }
}

fn createAtomFromSubsection(
    self: *Object,
    macho_file: *MachO,
    object_id: u32,
    sym_index: u32,
    inner_sym_index: u32,
    inner_nsyms_trailing: u32,
    size: u64,
    alignment: Alignment,
    out_sect_id: u8,
) !Atom.Index {
    const gpa = macho_file.base.allocator;
    const atom_index = try macho_file.createAtom(sym_index, .{
        .size = size,
        .alignment = alignment,
    });
    const atom = macho_file.getAtomPtr(atom_index);
    atom.inner_sym_index = inner_sym_index;
    atom.inner_nsyms_trailing = inner_nsyms_trailing;
    atom.file = object_id + 1;
    self.symtab[sym_index].n_sect = out_sect_id + 1;

    log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{
        sym_index,
        self.getSymbolName(sym_index),
        out_sect_id + 1,
        macho_file.sections.items(.header)[out_sect_id].segName(),
        macho_file.sections.items(.header)[out_sect_id].sectName(),
        object_id,
    });

    try self.atoms.append(gpa, atom_index);
    self.atom_by_index_table[sym_index] = atom_index;

    var it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
    while (it.next()) |sym_loc| {
        const inner = macho_file.getSymbolPtr(sym_loc);
        inner.n_sect = out_sect_id + 1;
        self.atom_by_index_table[sym_loc.sym_index] = atom_index;
    }

    const out_sect = macho_file.sections.items(.header)[out_sect_id];
    if (out_sect.isCode() and
        mem.eql(u8, "__TEXT", out_sect.segName()) and
        mem.eql(u8, "__text", out_sect.sectName()))
    {
        // TODO currently assuming a single section for executable machine code
        try self.exec_atoms.append(gpa, atom_index);
    }

    return atom_index;
}

fn filterRelocs(
    relocs: []align(1) const macho.relocation_info,
    start_addr: u64,
    end_addr: u64,
) Entry {
    const Predicate = struct {
        addr: u64,

        pub fn predicate(self: @This(), rel: macho.relocation_info) bool {
            return rel.r_address >= self.addr;
        }
    };
    const LPredicate = struct {
        addr: u64,

        pub fn predicate(self: @This(), rel: macho.relocation_info) bool {
            return rel.r_address < self.addr;
        }
    };

    const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr });
    const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr });

    return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) };
}

/// Parse all relocs for the input section, and sort in descending order.
/// Previously, I have wrongly assumed the compilers output relocations for each
/// section in a sorted manner which is simply not true.
fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void {
    const section = self.getSourceSection(sect_id);
    const start = @as(u32, @intCast(self.relocations.items.len));
    if (self.getSourceRelocs(section)) |relocs| {
        try self.relocations.ensureUnusedCapacity(gpa, relocs.len);
        self.relocations.appendUnalignedSliceAssumeCapacity(relocs);
        mem.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan);
    }
    self.section_relocs_lookup.items[sect_id] = start;
}

fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: Atom.Index) !void {
    const atom = macho_file.getAtom(atom_index);

    const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
        break :blk source_sym.n_sect - 1;
    } else blk: {
        // If there was no matching symbol present in the source symtab, this means
        // we are dealing with either an entire section, or part of it, but also
        // starting at the beginning.
        const nbase = @as(u32, @intCast(self.in_symtab.?.len));
        const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
        break :blk sect_id;
    };
    const source_sect = self.getSourceSection(source_sect_id);
    assert(!source_sect.isZerofill());
    const relocs = self.getRelocs(source_sect_id);

    self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
        const offset = source_sym.n_value - source_sect.addr;
        break :blk filterRelocs(relocs, offset, offset + atom.size);
    } else filterRelocs(relocs, 0, atom.size);
}

fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool {
    _ = ctx;
    return lhs.r_address > rhs.r_address;
}

fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void {
    const sect_id = self.eh_frame_sect_id orelse return;
    const sect = self.getSourceSection(sect_id);

    log.debug("parsing __TEXT,__eh_frame section", .{});

    const gpa = macho_file.base.allocator;

    if (macho_file.eh_frame_section_index == null) {
        macho_file.eh_frame_section_index = try macho_file.initSection("__TEXT", "__eh_frame", .{});
    }

    const cpu_arch = macho_file.base.options.target.cpu.arch;
    try self.parseRelocs(gpa, sect_id);
    const relocs = self.getRelocs(sect_id);

    var it = self.getEhFrameRecordsIterator();
    var record_count: u32 = 0;
    while (try it.next()) |_| {
        record_count += 1;
    }

    try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count);
    try self.eh_frame_records_lookup.ensureUnusedCapacity(gpa, record_count);

    it.reset();

    while (try it.next()) |record| {
        const offset = it.pos - record.getSize();
        const rel_pos: Entry = switch (cpu_arch) {
            .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()),
            .x86_64 => .{},
            else => unreachable,
        };
        self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{
            .dead = false,
            .reloc = rel_pos,
        });

        if (record.tag == .fde) {
            const target = blk: {
                switch (cpu_arch) {
                    .aarch64 => {
                        assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed
                        // Find function symbol that this record describes
                        const rel = for (relocs[rel_pos.start..][0..rel_pos.len]) |rel| {
                            if (rel.r_address - @as(i32, @intCast(offset)) == 8 and
                                @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_UNSIGNED)
                                break rel;
                        } else unreachable;
                        const target = Atom.parseRelocTarget(macho_file, .{
                            .object_id = object_id,
                            .rel = rel,
                            .code = it.data[offset..],
                            .base_offset = @as(i32, @intCast(offset)),
                        });
                        break :blk target;
                    },
                    .x86_64 => {
                        const target_address = record.getTargetSymbolAddress(.{
                            .base_addr = sect.addr,
                            .base_offset = offset,
                        });
                        const target_sym_index = self.getSymbolByAddress(target_address, null);
                        const target = if (self.getGlobal(target_sym_index)) |global_index|
                            macho_file.globals.items[global_index]
                        else
                            SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 };
                        break :blk target;
                    },
                    else => unreachable,
                }
            };
            if (target.getFile() != object_id) {
                log.debug("FDE at offset {x} marked DEAD", .{offset});
                self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true;
            } else {
                // You would think that we are done but turns out that the compilers may use
                // whichever symbol alias they want for a target symbol. This in particular
                // very problematic when using Zig's @export feature to re-export symbols under
                // additional names. For that reason, we need to ensure we record aliases here
                // too so that we can tie them with their matching unwind records and vice versa.
                const aliases = self.getSymbolAliases(target.sym_index);
                var i: u32 = 0;
                while (i < aliases.len) : (i += 1) {
                    const actual_target = SymbolWithLoc{
                        .sym_index = i + aliases.start,
                        .file = target.file,
                    };
                    log.debug("FDE at offset {x} tracks {s}", .{
                        offset,
                        macho_file.getSymbolName(actual_target),
                    });
                    try self.eh_frame_records_lookup.putNoClobber(gpa, actual_target, offset);
                }
            }
        }
    }
}

fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void {
    const gpa = macho_file.base.allocator;
    const cpu_arch = macho_file.base.options.target.cpu.arch;
    const sect_id = self.unwind_info_sect_id orelse {
        // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`,
        // we will try fully synthesising unwind info records to somewhat match Apple ld's
        // approach. However, we will only synthesise DWARF records and nothing more. For this reason,
        // we still create the output `__TEXT,__unwind_info` section.
        if (self.hasEhFrameRecords()) {
            if (macho_file.unwind_info_section_index == null) {
                macho_file.unwind_info_section_index = try macho_file.initSection(
                    "__TEXT",
                    "__unwind_info",
                    .{},
                );
            }
        }
        return;
    };

    log.debug("parsing unwind info in {s}", .{self.name});

    if (macho_file.unwind_info_section_index == null) {
        macho_file.unwind_info_section_index = try macho_file.initSection("__TEXT", "__unwind_info", .{});
    }

    const unwind_records = self.getUnwindRecords();

    try self.unwind_records_lookup.ensureUnusedCapacity(gpa, @as(u32, @intCast(unwind_records.len)));

    const needs_eh_frame = for (unwind_records) |record| {
        if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true;
    } else false;

    if (needs_eh_frame and !self.hasEhFrameRecords()) return error.MissingEhFrameSection;

    try self.parseRelocs(gpa, sect_id);
    const relocs = self.getRelocs(sect_id);

    for (unwind_records, 0..) |record, record_id| {
        const offset = record_id * @sizeOf(macho.compact_unwind_entry);
        const rel_pos = filterRelocs(
            relocs,
            offset,
            offset + @sizeOf(macho.compact_unwind_entry),
        );
        assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed
        self.unwind_relocs_lookup[record_id] = .{
            .dead = false,
            .reloc = rel_pos,
        };

        // Find function symbol that this record describes
        const rel = relocs[rel_pos.start..][rel_pos.len - 1];
        const target = Atom.parseRelocTarget(macho_file, .{
            .object_id = object_id,
            .rel = rel,
            .code = mem.asBytes(&record),
            .base_offset = @as(i32, @intCast(offset)),
        });
        if (target.getFile() != object_id) {
            log.debug("unwind record {d} marked DEAD", .{record_id});
            self.unwind_relocs_lookup[record_id].dead = true;
        } else {
            // You would think that we are done but turns out that the compilers may use
            // whichever symbol alias they want for a target symbol. This in particular
            // very problematic when using Zig's @export feature to re-export symbols under
            // additional names. For that reason, we need to ensure we record aliases here
            // too so that we can tie them with their matching unwind records and vice versa.
            const aliases = self.getSymbolAliases(target.sym_index);
            var i: u32 = 0;
            while (i < aliases.len) : (i += 1) {
                const actual_target = SymbolWithLoc{
                    .sym_index = i + aliases.start,
                    .file = target.file,
                };
                log.debug("unwind record {d} tracks {s}", .{
                    record_id,
                    macho_file.getSymbolName(actual_target),
                });
                try self.unwind_records_lookup.putNoClobber(gpa, actual_target, @intCast(record_id));
            }
        }
    }
}

pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 {
    const symtab = self.in_symtab.?;
    if (index >= symtab.len) return null;
    const mapped_index = self.source_symtab_lookup[index];
    return symtab[mapped_index];
}

pub fn getSourceSection(self: Object, index: u8) macho.section_64 {
    const sections = self.getSourceSections();
    assert(index < sections.len);
    return sections[index];
}

pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 {
    const index = self.getSourceSectionIndexByName(segname, sectname) orelse return null;
    const sections = self.getSourceSections();
    return sections[index];
}

pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 {
    const sections = self.getSourceSections();
    for (sections, 0..) |sect, i| {
        if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName()))
            return @as(u8, @intCast(i));
    } else return null;
}

pub fn getSourceSections(self: Object) []const macho.section_64 {
    var it = LoadCommandIterator{
        .ncmds = self.header.ncmds,
        .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
    };
    while (it.next()) |cmd| switch (cmd.cmd()) {
        .SEGMENT_64 => {
            return cmd.getSections();
        },
        else => {},
    } else unreachable;
}

pub fn parseDataInCode(self: *Object, gpa: Allocator) !void {
    var it = LoadCommandIterator{
        .ncmds = self.header.ncmds,
        .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
    };
    const cmd = while (it.next()) |cmd| {
        switch (cmd.cmd()) {
            .DATA_IN_CODE => break cmd.cast(macho.linkedit_data_command).?,
            else => {},
        }
    } else return;
    const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
    const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(self.contents.ptr + cmd.dataoff))[0..ndice];
    try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len);
    self.data_in_code.appendUnalignedSliceAssumeCapacity(dice);
    mem.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan);
}

fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool {
    _ = ctx;
    return lhs.offset < rhs.offset;
}

fn getDysymtab(self: Object) ?macho.dysymtab_command {
    var it = LoadCommandIterator{
        .ncmds = self.header.ncmds,
        .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
    };
    while (it.next()) |cmd| {
        switch (cmd.cmd()) {
            .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?,
            else => {},
        }
    } else return null;
}

pub fn parseDwarfInfo(self: Object) DwarfInfo {
    var di = DwarfInfo{
        .debug_info = &[0]u8{},
        .debug_abbrev = &[0]u8{},
        .debug_str = &[0]u8{},
    };
    for (self.getSourceSections()) |sect| {
        if (!sect.isDebug()) continue;
        const sectname = sect.sectName();
        if (mem.eql(u8, sectname, "__debug_info")) {
            di.debug_info = self.getSectionContents(sect);
        } else if (mem.eql(u8, sectname, "__debug_abbrev")) {
            di.debug_abbrev = self.getSectionContents(sect);
        } else if (mem.eql(u8, sectname, "__debug_str")) {
            di.debug_str = self.getSectionContents(sect);
        }
    }
    return di;
}

/// Returns Platform composed from the first encountered build version type load command:
/// either LC_BUILD_VERSION or LC_VERSION_MIN_*.
pub fn getPlatform(self: Object) ?Platform {
    var it = LoadCommandIterator{
        .ncmds = self.header.ncmds,
        .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
    };
    while (it.next()) |cmd| {
        switch (cmd.cmd()) {
            .BUILD_VERSION,
            .VERSION_MIN_MACOSX,
            .VERSION_MIN_IPHONEOS,
            .VERSION_MIN_TVOS,
            .VERSION_MIN_WATCHOS,
            => return Platform.fromLoadCommand(cmd),
            else => {},
        }
    } else return null;
}

pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 {
    const size = @as(usize, @intCast(sect.size));
    return self.contents[sect.offset..][0..size];
}

pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 {
    const start = @as(u32, @intCast(self.in_symtab.?.len));
    return start + sect_id;
}

pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 {
    return self.symtab[self.getSectionAliasSymbolIndex(sect_id)];
}

pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 {
    return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)];
}

fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info {
    if (sect.nreloc == 0) return null;
    return @as([*]align(1) const macho.relocation_info, @ptrCast(self.contents.ptr + sect.reloff))[0..sect.nreloc];
}

pub fn getRelocs(self: Object, sect_id: u8) []const macho.relocation_info {
    const sect = self.getSourceSection(sect_id);
    const start = self.section_relocs_lookup.items[sect_id];
    const len = sect.nreloc;
    return self.relocations.items[start..][0..len];
}

pub fn getSymbolName(self: Object, index: u32) []const u8 {
    const strtab = self.in_strtab.?;
    const sym = self.symtab[index];

    if (self.getSourceSymbol(index) == null) {
        assert(sym.n_strx == 0);
        return "";
    }

    const start = sym.n_strx;
    const len = self.strtab_lookup[index];

    return strtab[start..][0 .. len - 1 :0];
}

fn getSymbolAliases(self: Object, index: u32) Entry {
    const addr = self.source_address_lookup[index];
    var start = index;
    while (start > 0 and
        self.source_address_lookup[start - 1] == addr) : (start -= 1)
    {}
    const end: u32 = for (self.source_address_lookup[start..], start..) |saddr, i| {
        if (saddr != addr) break @as(u32, @intCast(i));
    } else @as(u32, @intCast(self.source_address_lookup.len));
    return .{ .start = start, .len = end - start };
}

pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 {
    // Find containing atom
    const Predicate = struct {
        addr: i64,

        pub fn predicate(pred: @This(), other: i64) bool {
            return if (other == -1) true else other > pred.addr;
        }
    };

    if (sect_hint) |sect_id| {
        if (self.source_section_index_lookup[sect_id].len > 0) {
            const lookup = self.source_section_index_lookup[sect_id];
            const target_sym_index = MachO.lsearch(
                i64,
                self.source_address_lookup[lookup.start..][0..lookup.len],
                Predicate{ .addr = @as(i64, @intCast(addr)) },
            );
            if (target_sym_index > 0) {
                // Hone in on the most senior alias of the target symbol.
                // See SymbolAtIndex.lessThan for more context.
                const aliases = self.getSymbolAliases(@intCast(lookup.start + target_sym_index - 1));
                return aliases.start;
            }
        }
        return self.getSectionAliasSymbolIndex(sect_id);
    }

    const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{
        .addr = @as(i64, @intCast(addr)),
    });
    assert(target_sym_index > 0);
    return @as(u32, @intCast(target_sym_index - 1));
}

pub fn getGlobal(self: Object, sym_index: u32) ?u32 {
    if (self.globals_lookup[sym_index] == -1) return null;
    return @as(u32, @intCast(self.globals_lookup[sym_index]));
}

pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?Atom.Index {
    return self.atom_by_index_table[sym_index];
}

pub fn hasUnwindRecords(self: Object) bool {
    return self.unwind_info_sect_id != null;
}

pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry {
    const sect_id = self.unwind_info_sect_id orelse return &[0]macho.compact_unwind_entry{};
    const sect = self.getSourceSection(sect_id);
    const data = self.getSectionContents(sect);
    const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
    return @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data))[0..num_entries];
}

pub fn hasEhFrameRecords(self: Object) bool {
    return self.eh_frame_sect_id != null;
}

pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator {
    const sect_id = self.eh_frame_sect_id orelse return .{ .data = &[0]u8{} };
    const sect = self.getSourceSection(sect_id);
    const data = self.getSectionContents(sect);
    return .{ .data = data };
}

pub fn hasDataInCode(self: Object) bool {
    return self.data_in_code.items.len > 0;
}

const Object = @This();

const std = @import("std");
const build_options = @import("build_options");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const eh_frame = @import("eh_frame.zig");
const fs = std.fs;
const io = std.io;
const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const trace = @import("../../tracy.zig").trace;

const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
const DwarfInfo = @import("DwarfInfo.zig");
const LoadCommandIterator = macho.LoadCommandIterator;
const MachO = @import("../MachO.zig");
const Platform = @import("load_commands.zig").Platform;
const SymbolWithLoc = MachO.SymbolWithLoc;
const UnwindInfo = @import("UnwindInfo.zig");
const Alignment = Atom.Alignment;