1 files changed, 160 insertions, 249 deletions
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
index 27da019be8..12c480b0f1 100644
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@@ -31,14 +31,12 @@ header: ?macho.mach_header_64 = null,
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 
 segment_cmd_index: ?u16 = null,
+text_section_index: ?u16 = null,
 symtab_cmd_index: ?u16 = null,
 dysymtab_cmd_index: ?u16 = null,
 build_version_cmd_index: ?u16 = null,
 data_in_code_cmd_index: ?u16 = null,
 
-text_section_index: ?u16 = null,
-mod_init_func_section_index: ?u16 = null,
-
 // __DWARF segment sections
 dwarf_debug_info_index: ?u16 = null,
 dwarf_debug_abbrev_index: ?u16 = null,
@@ -56,7 +54,9 @@ tu_name: ?[]const u8 = null,
 tu_comp_dir: ?[]const u8 = null,
 mtime: ?u64 = null,
 
-atoms: std.ArrayListUnmanaged(*Atom) = .{},
+contained_atoms: std.ArrayListUnmanaged(*Atom) = .{},
+start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
+end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
 sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
 
 // TODO symbol mapping and its inverse can probably be simple arrays
@@ -138,12 +138,15 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
     self.data_in_code_entries.deinit(allocator);
     self.symtab.deinit(allocator);
     self.strtab.deinit(allocator);
-    self.atoms.deinit(allocator);
     self.sections_as_symbols.deinit(allocator);
     self.symbol_mapping.deinit(allocator);
     self.reverse_symbol_mapping.deinit(allocator);
     allocator.free(self.name);
 
+    self.contained_atoms.deinit(allocator);
+    self.start_atoms.deinit(allocator);
+    self.end_atoms.deinit(allocator);
+
     if (self.debug_info) |*db| {
         db.deinit(allocator);
     }
@@ -157,6 +160,67 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
     }
 }
 
+pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void {
+    log.debug("freeObject {*}", .{self});
+
+    var it = self.end_atoms.iterator();
+    while (it.next()) |entry| {
+        const match = entry.key_ptr.*;
+        const first_atom = self.start_atoms.get(match).?;
+        const last_atom = entry.value_ptr.*;
+        var atom = first_atom;
+
+        while (true) {
+            if (atom.local_sym_index != 0) {
+                macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
+                const local = &macho_file.locals.items[atom.local_sym_index];
+                local.n_type = 0;
+                atom.local_sym_index = 0;
+            }
+            if (atom == last_atom) {
+                break;
+            }
+            if (atom.next) |next| {
+                atom = next;
+            } else break;
+        }
+    }
+
+    self.freeAtoms(macho_file);
+}
+
+fn freeAtoms(self: *Object, macho_file: *MachO) void {
+    var it = self.end_atoms.iterator();
+    while (it.next()) |entry| {
+        const match = entry.key_ptr.*;
+        var first_atom: *Atom = self.start_atoms.get(match).?;
+        var last_atom: *Atom = entry.value_ptr.*;
+
+        if (macho_file.atoms.getPtr(match)) |atom_ptr| {
+            if (atom_ptr.* == last_atom) {
+                if (first_atom.prev) |prev| {
+                    // TODO shrink the section size here
+                    atom_ptr.* = prev;
+                } else {
+                    _ = macho_file.atoms.fetchRemove(match);
+                }
+            }
+        }
+
+        if (first_atom.prev) |prev| {
+            prev.next = last_atom.next;
+        } else {
+            first_atom.prev = null;
+        }
+
+        if (last_atom.next) |next| {
+            next.prev = last_atom.prev;
+        } else {
+            last_atom.next = null;
+        }
+    }
+}
+
 pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void {
     const reader = self.file.reader();
     if (self.file_offset) |offset| {
@@ -226,10 +290,6 @@ pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !
                         if (mem.eql(u8, sectname, "__text")) {
                             self.text_section_index = index;
                         }
-                    } else if (mem.eql(u8, segname, "__DATA")) {
-                        if (mem.eql(u8, sectname, "__mod_init_func")) {
-                            self.mod_init_func_section_index = index;
-                        }
                     }
 
                     sect.offset += offset;
@@ -320,7 +380,6 @@ const Context = struct {
     object: *Object,
     macho_file: *MachO,
     match: MachO.MatchingSection,
-    parsed_atoms: *ParsedAtoms,
 };
 
 const AtomParser = struct {
@@ -437,7 +496,6 @@ const AtomParser = struct {
             .allocator = context.allocator,
             .object = context.object,
             .macho_file = context.macho_file,
-            .parsed_atoms = context.parsed_atoms,
         });
 
         if (context.macho_file.has_dices) {
@@ -463,18 +521,10 @@ const AtomParser = struct {
     }
 };
 
-pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom);
-
-pub fn parseIntoAtoms(
-    self: *Object,
-    allocator: *Allocator,
-    object_id: u16,
-    macho_file: *MachO,
-) !ParsedAtoms {
+pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    var parsed_atoms = ParsedAtoms.init(allocator);
     const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
 
     log.debug("analysing {s}", .{self.name});
@@ -540,16 +590,6 @@ pub fn parseIntoAtoms(
         // Symbols within this section only.
         const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
 
-        // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense
-        // to do this in a standalone pass after we parse the sections as atoms.
-        // In release mode, if the object file was generated with dead code stripping optimisations,
-        // note it now and parse sections as atoms.
-        // const is_splittable = blk: {
-        //     if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
-        //     break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
-        // };
-        const is_splittable = false;
-
         macho_file.has_dices = macho_file.has_dices or blk: {
             if (self.text_section_index) |index| {
                 if (index != id) break :blk false;
@@ -560,237 +600,108 @@ pub fn parseIntoAtoms(
         };
         macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
 
-        next: {
-            if (is_splittable) atoms: {
-                if (filtered_nlists.len == 0) break :atoms;
-
-                // If the first nlist does not match the start of the section,
-                // then we need to encapsulate the memory range [section start, first symbol)
-                // as a temporary symbol and insert the matching Atom.
-                const first_nlist = filtered_nlists[0].nlist;
-                if (first_nlist.n_value > sect.addr) {
-                    const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
-                        self.name,
-                        segmentName(sect),
-                        sectionName(sect),
-                    });
-                    defer allocator.free(sym_name);
-
-                    const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
-                        const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
-                        try macho_file.locals.append(allocator, .{
-                            .n_strx = try macho_file.makeString(sym_name),
-                            .n_type = macho.N_SECT,
-                            .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
-                            .n_desc = 0,
-                            .n_value = 0,
-                        });
-                        try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
-                        break :blk atom_local_sym_index;
-                    };
-                    const atom_code = code[0 .. first_nlist.n_value - sect.addr];
-                    const atom_size = atom_code.len;
-                    const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align");
-
-                    const is_zerofill = blk: {
-                        const section_type = commands.sectionType(sect);
-                        break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
-                    };
-                    if (!is_zerofill) {
-                        mem.copy(u8, atom.code.items, atom_code);
-                    }
-
-                    try atom.parseRelocs(relocs, .{
-                        .base_addr = sect.addr,
-                        .base_offset = 0,
-                        .allocator = allocator,
-                        .object = self,
-                        .macho_file = macho_file,
-                        .parsed_atoms = &parsed_atoms,
-                    });
-
-                    if (macho_file.has_dices) {
-                        const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size);
-                        try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
-                        for (dices) |dice| {
-                            atom.dices.appendAssumeCapacity(.{
-                                .offset = dice.offset - try math.cast(u32, sect.addr),
-                                .length = dice.length,
-                                .kind = dice.kind,
-                            });
-                        }
-                    }
-
-                    if (parsed_atoms.getPtr(match)) |last| {
-                        last.*.next = atom;
-                        atom.prev = last.*;
-                        last.* = atom;
-                    } else {
-                        try parsed_atoms.putNoClobber(match, atom);
-                    }
-                    try self.atoms.append(allocator, atom);
-                }
-
-                var parser = AtomParser{
-                    .section = sect,
-                    .code = code,
-                    .relocs = relocs,
-                    .nlists = filtered_nlists,
-                };
-
-                while (try parser.next(.{
-                    .allocator = allocator,
-                    .object = self,
-                    .macho_file = macho_file,
-                    .match = match,
-                    .parsed_atoms = &parsed_atoms,
-                })) |atom| {
-                    const sym = macho_file.locals.items[atom.local_sym_index];
-                    const is_ext = blk: {
-                        const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable;
-                        break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
-                    };
-                    if (is_ext) {
-                        if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| {
-                            assert(resolv.where == .global);
-                            if (resolv.file != object_id) {
-                                log.debug("deduping definition of {s} in {s}", .{
-                                    macho_file.getString(sym.n_strx),
-                                    self.name,
-                                });
-                                log.debug("  already defined in {s}", .{
-                                    macho_file.objects.items[resolv.file].name,
-                                });
-                                continue;
-                            }
-                        }
-                    }
+        // Since there is no symbol to refer to this atom, we create
+        // a temp one, unless we already did that when working out the relocations
+        // of other atoms.
+        const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
+            self.name,
+            segmentName(sect),
+            sectionName(sect),
+        });
+        defer allocator.free(sym_name);
+
+        const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
+            const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
+            try macho_file.locals.append(allocator, .{
+                .n_strx = try macho_file.makeString(sym_name),
+                .n_type = macho.N_SECT,
+                .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
+            break :blk atom_local_sym_index;
+        };
+        const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
 
-                    if (sym.n_value == sect.addr) {
-                        if (self.sections_as_symbols.get(sect_id)) |alias| {
-                            // In x86_64 relocs, it can so happen that the compiler refers to the same
-                            // atom by both the actual assigned symbol and the start of the section. In this
-                            // case, we need to link the two together so add an alias.
-                            try atom.aliases.append(allocator, alias);
-                        }
-                    }
+        const is_zerofill = blk: {
+            const section_type = commands.sectionType(sect);
+            break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
+        };
+        if (!is_zerofill) {
+            mem.copy(u8, atom.code.items, code);
+        }
 
-                    if (parsed_atoms.getPtr(match)) |last| {
-                        last.*.next = atom;
-                        atom.prev = last.*;
-                        last.* = atom;
-                    } else {
-                        try parsed_atoms.putNoClobber(match, atom);
-                    }
-                    try self.atoms.append(allocator, atom);
-                }
+        try atom.parseRelocs(relocs, .{
+            .base_addr = sect.addr,
+            .base_offset = 0,
+            .allocator = allocator,
+            .object = self,
+            .macho_file = macho_file,
+        });
 
-                break :next;
-            }
+        if (macho_file.has_dices) {
+            const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
+            try atom.dices.ensureTotalCapacity(allocator, dices.len);
 
-            // Since there is no symbol to refer to this atom, we create
-            // a temp one, unless we already did that when working out the relocations
-            // of other atoms.
-            const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
-                self.name,
-                segmentName(sect),
-                sectionName(sect),
-            });
-            defer allocator.free(sym_name);
-
-            const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
-                const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
-                try macho_file.locals.append(allocator, .{
-                    .n_strx = try macho_file.makeString(sym_name),
-                    .n_type = macho.N_SECT,
-                    .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
-                    .n_desc = 0,
-                    .n_value = 0,
+            for (dices) |dice| {
+                atom.dices.appendAssumeCapacity(.{
+                    .offset = dice.offset - try math.cast(u32, sect.addr),
+                    .length = dice.length,
+                    .kind = dice.kind,
                 });
-                try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
-                break :blk atom_local_sym_index;
-            };
-            const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
-
-            const is_zerofill = blk: {
-                const section_type = commands.sectionType(sect);
-                break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
-            };
-            if (!is_zerofill) {
-                mem.copy(u8, atom.code.items, code);
-            }
-
-            try atom.parseRelocs(relocs, .{
-                .base_addr = sect.addr,
-                .base_offset = 0,
-                .allocator = allocator,
-                .object = self,
-                .macho_file = macho_file,
-                .parsed_atoms = &parsed_atoms,
-            });
-
-            if (macho_file.has_dices) {
-                const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
-                try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
-                for (dices) |dice| {
-                    atom.dices.appendAssumeCapacity(.{
-                        .offset = dice.offset - try math.cast(u32, sect.addr),
-                        .length = dice.length,
-                        .kind = dice.kind,
-                    });
-                }
             }
+        }
 
-            // Since this is atom gets a helper local temporary symbol that didn't exist
-            // in the object file which encompasses the entire section, we need traverse
-            // the filtered symbols and note which symbol is contained within so that
-            // we can properly allocate addresses down the line.
-            // While we're at it, we need to update segment,section mapping of each symbol too.
-            try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
-
-            for (filtered_nlists) |nlist_with_index| {
-                const nlist = nlist_with_index.nlist;
-                const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
-                const local = &macho_file.locals.items[local_sym_index];
-                local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
-
-                const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
-                    // TODO there has to be a better to handle this.
-                    for (di.inner.func_list.items) |func| {
-                        if (func.pc_range) |range| {
-                            if (nlist.n_value >= range.start and nlist.n_value < range.end) {
-                                break :blk Atom.Stab{
-                                    .function = range.end - range.start,
-                                };
-                            }
+        // Since this is atom gets a helper local temporary symbol that didn't exist
+        // in the object file which encompasses the entire section, we need traverse
+        // the filtered symbols and note which symbol is contained within so that
+        // we can properly allocate addresses down the line.
+        // While we're at it, we need to update segment,section mapping of each symbol too.
+        try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
+
+        for (filtered_nlists) |nlist_with_index| {
+            const nlist = nlist_with_index.nlist;
+            const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
+            const local = &macho_file.locals.items[local_sym_index];
+            local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
+
+            const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
+                // TODO there has to be a better to handle this.
+                for (di.inner.func_list.items) |func| {
+                    if (func.pc_range) |range| {
+                        if (nlist.n_value >= range.start and nlist.n_value < range.end) {
+                            break :blk Atom.Stab{
+                                .function = range.end - range.start,
+                            };
                         }
                     }
-                    // TODO
-                    // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
-                    break :blk .static;
-                } else null;
-
-                atom.contained.appendAssumeCapacity(.{
-                    .local_sym_index = local_sym_index,
-                    .offset = nlist.n_value - sect.addr,
-                    .stab = stab,
-                });
-            }
+                }
+                // TODO
+                // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
+                break :blk .static;
+            } else null;
+
+            atom.contained.appendAssumeCapacity(.{
+                .local_sym_index = local_sym_index,
+                .offset = nlist.n_value - sect.addr,
+                .stab = stab,
+            });
+        }
 
-            if (parsed_atoms.getPtr(match)) |last| {
-                last.*.next = atom;
-                atom.prev = last.*;
-                last.* = atom;
-            } else {
-                try parsed_atoms.putNoClobber(match, atom);
-            }
-            try self.atoms.append(allocator, atom);
+        if (!self.start_atoms.contains(match)) {
+            try self.start_atoms.putNoClobber(allocator, match, atom);
         }
-    }
 
-    return parsed_atoms;
+        if (self.end_atoms.getPtr(match)) |last| {
+            last.*.next = atom;
+            atom.prev = last.*;
+            last.* = atom;
+        } else {
+            try self.end_atoms.putNoClobber(allocator, match, atom);
+        }
+        try self.contained_atoms.append(allocator, atom);
+    }
 }
 
 fn parseSymtab(self: *Object, allocator: *Allocator) !void {