macho: implement object relinking in stage2

* In watch mode, when changing the C source, we will trigger complete relinking of objects, dylibs and archives (atoms coming from the incremental updates stay put however). This means, we need to undo metadata populated when linking in objects, archives and dylibs. * Remove unused splitting section into atoms bit. This optimisation will probably be best rewritten from scratch once self-hosted matures so parking the idea for now. Also, for easier management of atoms spawned from the Object file, keep the atoms subgraph as part of the Object file struct. * Remove obsolete ref to static initializers in object struct. * Implement handling of global symbol collision in updateDeclExports.
author: Jakub Konka <kubkon@jakubkonka.com> 2021-09-15 16:57:10 +0200
committer: Jakub Konka <kubkon@jakubkonka.com> 2021-09-16 12:38:47 +0200
commit: 983d6dcd9ea75e05abd8ce2bd247bbad3960acd7 (patch)
tree: c5767d15a29bcc1e3704ca37e3dc0ddf54eca376 /src/link/MachO/Object.zig
parent: 506f24cac2f5226210f9ce505d5b93c47b7b8c87 (diff)
download: zig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.tar.gz
zig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.zip
1 files changed, 160 insertions, 249 deletions
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
index 27da019be8..12c480b0f1 100644
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@@ -31,14 +31,12 @@ header: ?macho.mach_header_64 = null,
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 
 segment_cmd_index: ?u16 = null,
+text_section_index: ?u16 = null,
 symtab_cmd_index: ?u16 = null,
 dysymtab_cmd_index: ?u16 = null,
 build_version_cmd_index: ?u16 = null,
 data_in_code_cmd_index: ?u16 = null,
 
-text_section_index: ?u16 = null,
-mod_init_func_section_index: ?u16 = null,
-
 // __DWARF segment sections
 dwarf_debug_info_index: ?u16 = null,
 dwarf_debug_abbrev_index: ?u16 = null,
@@ -56,7 +54,9 @@ tu_name: ?[]const u8 = null,
 tu_comp_dir: ?[]const u8 = null,
 mtime: ?u64 = null,
 
-atoms: std.ArrayListUnmanaged(*Atom) = .{},
+contained_atoms: std.ArrayListUnmanaged(*Atom) = .{},
+start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
+end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
 sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
 
 // TODO symbol mapping and its inverse can probably be simple arrays
@@ -138,12 +138,15 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
     self.data_in_code_entries.deinit(allocator);
     self.symtab.deinit(allocator);
     self.strtab.deinit(allocator);
-    self.atoms.deinit(allocator);
     self.sections_as_symbols.deinit(allocator);
     self.symbol_mapping.deinit(allocator);
     self.reverse_symbol_mapping.deinit(allocator);
     allocator.free(self.name);
 
+    self.contained_atoms.deinit(allocator);
+    self.start_atoms.deinit(allocator);
+    self.end_atoms.deinit(allocator);
+
     if (self.debug_info) |*db| {
         db.deinit(allocator);
     }
@@ -157,6 +160,67 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
     }
 }
 
+pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void {
+    log.debug("freeObject {*}", .{self});
+
+    var it = self.end_atoms.iterator();
+    while (it.next()) |entry| {
+        const match = entry.key_ptr.*;
+        const first_atom = self.start_atoms.get(match).?;
+        const last_atom = entry.value_ptr.*;
+        var atom = first_atom;
+
+        while (true) {
+            if (atom.local_sym_index != 0) {
+                macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
+                const local = &macho_file.locals.items[atom.local_sym_index];
+                local.n_type = 0;
+                atom.local_sym_index = 0;
+            }
+            if (atom == last_atom) {
+                break;
+            }
+            if (atom.next) |next| {
+                atom = next;
+            } else break;
+        }
+    }
+
+    self.freeAtoms(macho_file);
+}
+
+fn freeAtoms(self: *Object, macho_file: *MachO) void {
+    var it = self.end_atoms.iterator();
+    while (it.next()) |entry| {
+        const match = entry.key_ptr.*;
+        var first_atom: *Atom = self.start_atoms.get(match).?;
+        var last_atom: *Atom = entry.value_ptr.*;
+
+        if (macho_file.atoms.getPtr(match)) |atom_ptr| {
+            if (atom_ptr.* == last_atom) {
+                if (first_atom.prev) |prev| {
+                    // TODO shrink the section size here
+                    atom_ptr.* = prev;
+                } else {
+                    _ = macho_file.atoms.fetchRemove(match);
+                }
+            }
+        }
+
+        if (first_atom.prev) |prev| {
+            prev.next = last_atom.next;
+        } else {
+            first_atom.prev = null;
+        }
+
+        if (last_atom.next) |next| {
+            next.prev = last_atom.prev;
+        } else {
+            last_atom.next = null;
+        }
+    }
+}
+
 pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void {
     const reader = self.file.reader();
     if (self.file_offset) |offset| {
@@ -226,10 +290,6 @@ pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !
                         if (mem.eql(u8, sectname, "__text")) {
                             self.text_section_index = index;
                         }
-                    } else if (mem.eql(u8, segname, "__DATA")) {
-                        if (mem.eql(u8, sectname, "__mod_init_func")) {
-                            self.mod_init_func_section_index = index;
-                        }
                     }
 
                     sect.offset += offset;
@@ -320,7 +380,6 @@ const Context = struct {
     object: *Object,
     macho_file: *MachO,
     match: MachO.MatchingSection,
-    parsed_atoms: *ParsedAtoms,
 };
 
 const AtomParser = struct {
@@ -437,7 +496,6 @@ const AtomParser = struct {
             .allocator = context.allocator,
             .object = context.object,
             .macho_file = context.macho_file,
-            .parsed_atoms = context.parsed_atoms,
         });
 
         if (context.macho_file.has_dices) {
@@ -463,18 +521,10 @@ const AtomParser = struct {
     }
 };
 
-pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom);
-
-pub fn parseIntoAtoms(
-    self: *Object,
-    allocator: *Allocator,
-    object_id: u16,
-    macho_file: *MachO,
-) !ParsedAtoms {
+pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    var parsed_atoms = ParsedAtoms.init(allocator);
     const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
 
     log.debug("analysing {s}", .{self.name});
@@ -540,16 +590,6 @@ pub fn parseIntoAtoms(
         // Symbols within this section only.
         const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
 
-        // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense
-        // to do this in a standalone pass after we parse the sections as atoms.
-        // In release mode, if the object file was generated with dead code stripping optimisations,
-        // note it now and parse sections as atoms.
-        // const is_splittable = blk: {
-        //     if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
-        //     break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
-        // };
-        const is_splittable = false;
-
         macho_file.has_dices = macho_file.has_dices or blk: {
             if (self.text_section_index) |index| {
                 if (index != id) break :blk false;
@@ -560,237 +600,108 @@ pub fn parseIntoAtoms(
         };
         macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
 
-        next: {
-            if (is_splittable) atoms: {
-                if (filtered_nlists.len == 0) break :atoms;
-
-                // If the first nlist does not match the start of the section,
-                // then we need to encapsulate the memory range [section start, first symbol)
-                // as a temporary symbol and insert the matching Atom.
-                const first_nlist = filtered_nlists[0].nlist;
-                if (first_nlist.n_value > sect.addr) {
-                    const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
-                        self.name,
-                        segmentName(sect),
-                        sectionName(sect),
-                    });
-                    defer allocator.free(sym_name);
-
-                    const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
-                        const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
-                        try macho_file.locals.append(allocator, .{
-                            .n_strx = try macho_file.makeString(sym_name),
-                            .n_type = macho.N_SECT,
-                            .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
-                            .n_desc = 0,
-                            .n_value = 0,
-                        });
-                        try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
-                        break :blk atom_local_sym_index;
-                    };
-                    const atom_code = code[0 .. first_nlist.n_value - sect.addr];
-                    const atom_size = atom_code.len;
-                    const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align");
-
-                    const is_zerofill = blk: {
-                        const section_type = commands.sectionType(sect);
-                        break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
-                    };
-                    if (!is_zerofill) {
-                        mem.copy(u8, atom.code.items, atom_code);
-                    }
-
-                    try atom.parseRelocs(relocs, .{
-                        .base_addr = sect.addr,
-                        .base_offset = 0,
-                        .allocator = allocator,
-                        .object = self,
-                        .macho_file = macho_file,
-                        .parsed_atoms = &parsed_atoms,
-                    });
-
-                    if (macho_file.has_dices) {
-                        const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size);
-                        try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
-                        for (dices) |dice| {
-                            atom.dices.appendAssumeCapacity(.{
-                                .offset = dice.offset - try math.cast(u32, sect.addr),
-                                .length = dice.length,
-                                .kind = dice.kind,
-                            });
-                        }
-                    }
-
-                    if (parsed_atoms.getPtr(match)) |last| {
-                        last.*.next = atom;
-                        atom.prev = last.*;
-                        last.* = atom;
-                    } else {
-                        try parsed_atoms.putNoClobber(match, atom);
-                    }
-                    try self.atoms.append(allocator, atom);
-                }
-
-                var parser = AtomParser{
-                    .section = sect,
-                    .code = code,
-                    .relocs = relocs,
-                    .nlists = filtered_nlists,
-                };
-
-                while (try parser.next(.{
-                    .allocator = allocator,
-                    .object = self,
-                    .macho_file = macho_file,
-                    .match = match,
-                    .parsed_atoms = &parsed_atoms,
-                })) |atom| {
-                    const sym = macho_file.locals.items[atom.local_sym_index];
-                    const is_ext = blk: {
-                        const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable;
-                        break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
-                    };
-                    if (is_ext) {
-                        if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| {
-                            assert(resolv.where == .global);
-                            if (resolv.file != object_id) {
-                                log.debug("deduping definition of {s} in {s}", .{
-                                    macho_file.getString(sym.n_strx),
-                                    self.name,
-                                });
-                                log.debug("  already defined in {s}", .{
-                                    macho_file.objects.items[resolv.file].name,
-                                });
-                                continue;
-                            }
-                        }
-                    }
+        // Since there is no symbol to refer to this atom, we create
+        // a temp one, unless we already did that when working out the relocations
+        // of other atoms.
+        const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
+            self.name,
+            segmentName(sect),
+            sectionName(sect),
+        });
+        defer allocator.free(sym_name);
+
+        const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
+            const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
+            try macho_file.locals.append(allocator, .{
+                .n_strx = try macho_file.makeString(sym_name),
+                .n_type = macho.N_SECT,
+                .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
+                .n_desc = 0,
+                .n_value = 0,
+            });
+            try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
+            break :blk atom_local_sym_index;
+        };
+        const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
 
-                    if (sym.n_value == sect.addr) {
-                        if (self.sections_as_symbols.get(sect_id)) |alias| {
-                            // In x86_64 relocs, it can so happen that the compiler refers to the same
-                            // atom by both the actual assigned symbol and the start of the section. In this
-                            // case, we need to link the two together so add an alias.
-                            try atom.aliases.append(allocator, alias);
-                        }
-                    }
+        const is_zerofill = blk: {
+            const section_type = commands.sectionType(sect);
+            break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
+        };
+        if (!is_zerofill) {
+            mem.copy(u8, atom.code.items, code);
+        }
 
-                    if (parsed_atoms.getPtr(match)) |last| {
-                        last.*.next = atom;
-                        atom.prev = last.*;
-                        last.* = atom;
-                    } else {
-                        try parsed_atoms.putNoClobber(match, atom);
-                    }
-                    try self.atoms.append(allocator, atom);
-                }
+        try atom.parseRelocs(relocs, .{
+            .base_addr = sect.addr,
+            .base_offset = 0,
+            .allocator = allocator,
+            .object = self,
+            .macho_file = macho_file,
+        });
 
-                break :next;
-            }
+        if (macho_file.has_dices) {
+            const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
+            try atom.dices.ensureTotalCapacity(allocator, dices.len);
 
-            // Since there is no symbol to refer to this atom, we create
-            // a temp one, unless we already did that when working out the relocations
-            // of other atoms.
-            const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
-                self.name,
-                segmentName(sect),
-                sectionName(sect),
-            });
-            defer allocator.free(sym_name);
-
-            const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
-                const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
-                try macho_file.locals.append(allocator, .{
-                    .n_strx = try macho_file.makeString(sym_name),
-                    .n_type = macho.N_SECT,
-                    .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
-                    .n_desc = 0,
-                    .n_value = 0,
+            for (dices) |dice| {
+                atom.dices.appendAssumeCapacity(.{
+                    .offset = dice.offset - try math.cast(u32, sect.addr),
+                    .length = dice.length,
+                    .kind = dice.kind,
                 });
-                try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
-                break :blk atom_local_sym_index;
-            };
-            const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
-
-            const is_zerofill = blk: {
-                const section_type = commands.sectionType(sect);
-                break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
-            };
-            if (!is_zerofill) {
-                mem.copy(u8, atom.code.items, code);
-            }
-
-            try atom.parseRelocs(relocs, .{
-                .base_addr = sect.addr,
-                .base_offset = 0,
-                .allocator = allocator,
-                .object = self,
-                .macho_file = macho_file,
-                .parsed_atoms = &parsed_atoms,
-            });
-
-            if (macho_file.has_dices) {
-                const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
-                try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
-                for (dices) |dice| {
-                    atom.dices.appendAssumeCapacity(.{
-                        .offset = dice.offset - try math.cast(u32, sect.addr),
-                        .length = dice.length,
-                        .kind = dice.kind,
-                    });
-                }
             }
+        }
 
-            // Since this is atom gets a helper local temporary symbol that didn't exist
-            // in the object file which encompasses the entire section, we need traverse
-            // the filtered symbols and note which symbol is contained within so that
-            // we can properly allocate addresses down the line.
-            // While we're at it, we need to update segment,section mapping of each symbol too.
-            try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
-
-            for (filtered_nlists) |nlist_with_index| {
-                const nlist = nlist_with_index.nlist;
-                const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
-                const local = &macho_file.locals.items[local_sym_index];
-                local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
-
-                const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
-                    // TODO there has to be a better to handle this.
-                    for (di.inner.func_list.items) |func| {
-                        if (func.pc_range) |range| {
-                            if (nlist.n_value >= range.start and nlist.n_value < range.end) {
-                                break :blk Atom.Stab{
-                                    .function = range.end - range.start,
-                                };
-                            }
+        // Since this is atom gets a helper local temporary symbol that didn't exist
+        // in the object file which encompasses the entire section, we need traverse
+        // the filtered symbols and note which symbol is contained within so that
+        // we can properly allocate addresses down the line.
+        // While we're at it, we need to update segment,section mapping of each symbol too.
+        try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
+
+        for (filtered_nlists) |nlist_with_index| {
+            const nlist = nlist_with_index.nlist;
+            const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
+            const local = &macho_file.locals.items[local_sym_index];
+            local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
+
+            const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
+                // TODO there has to be a better to handle this.
+                for (di.inner.func_list.items) |func| {
+                    if (func.pc_range) |range| {
+                        if (nlist.n_value >= range.start and nlist.n_value < range.end) {
+                            break :blk Atom.Stab{
+                                .function = range.end - range.start,
+                            };
                         }
                     }
-                    // TODO
-                    // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
-                    break :blk .static;
-                } else null;
-
-                atom.contained.appendAssumeCapacity(.{
-                    .local_sym_index = local_sym_index,
-                    .offset = nlist.n_value - sect.addr,
-                    .stab = stab,
-                });
-            }
+                }
+                // TODO
+                // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
+                break :blk .static;
+            } else null;
+
+            atom.contained.appendAssumeCapacity(.{
+                .local_sym_index = local_sym_index,
+                .offset = nlist.n_value - sect.addr,
+                .stab = stab,
+            });
+        }
 
-            if (parsed_atoms.getPtr(match)) |last| {
-                last.*.next = atom;
-                atom.prev = last.*;
-                last.* = atom;
-            } else {
-                try parsed_atoms.putNoClobber(match, atom);
-            }
-            try self.atoms.append(allocator, atom);
+        if (!self.start_atoms.contains(match)) {
+            try self.start_atoms.putNoClobber(allocator, match, atom);
         }
-    }
 
-    return parsed_atoms;
+        if (self.end_atoms.getPtr(match)) |last| {
+            last.*.next = atom;
+            atom.prev = last.*;
+            last.* = atom;
+        } else {
+            try self.end_atoms.putNoClobber(allocator, match, atom);
+        }
+        try self.contained_atoms.append(allocator, atom);
+    }
 }
 
 fn parseSymtab(self: *Object, allocator: *Allocator) !void {
author	Jakub Konka <kubkon@jakubkonka.com>	2021-09-15 16:57:10 +0200
committer	Jakub Konka <kubkon@jakubkonka.com>	2021-09-16 12:38:47 +0200
commit	983d6dcd9ea75e05abd8ce2bd247bbad3960acd7 (patch)
tree	c5767d15a29bcc1e3704ca37e3dc0ddf54eca376 /src/link/MachO/Object.zig
parent	506f24cac2f5226210f9ce505d5b93c47b7b8c87 (diff)
download	zig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.tar.gz zig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.zip