aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2021-09-15 16:57:10 +0200
committerJakub Konka <kubkon@jakubkonka.com>2021-09-16 12:38:47 +0200
commit983d6dcd9ea75e05abd8ce2bd247bbad3960acd7 (patch)
treec5767d15a29bcc1e3704ca37e3dc0ddf54eca376 /src
parent506f24cac2f5226210f9ce505d5b93c47b7b8c87 (diff)
downloadzig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.tar.gz
zig-983d6dcd9ea75e05abd8ce2bd247bbad3960acd7.zip
macho: implement object relinking in stage2
* In watch mode, when changing the C source, we will trigger complete relinking of objects, dylibs and archives (atoms coming from the incremental updates stay put however). This means, we need to undo metadata populated when linking in objects, archives and dylibs. * Remove unused splitting section into atoms bit. This optimisation will probably be best rewritten from scratch once self-hosted matures so parking the idea for now. Also, for easier management of atoms spawned from the Object file, keep the atoms subgraph as part of the Object file struct. * Remove obsolete ref to static initializers in object struct. * Implement handling of global symbol collision in updateDeclExports.
Diffstat (limited to 'src')
-rw-r--r--src/link/MachO.zig184
-rw-r--r--src/link/MachO/Atom.zig102
-rw-r--r--src/link/MachO/Object.zig409
3 files changed, 368 insertions, 327 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 8037c5e9a0..324870a705 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -231,7 +231,7 @@ const SymbolWithLoc = struct {
},
where_index: u32,
local_sym_index: u32 = 0,
- file: u16 = 0,
+ file: ?u16 = null, // null means Zig module
};
pub const GotIndirectionKey = struct {
@@ -543,9 +543,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void {
.mode = link.determineMode(self.base.options),
});
try self.populateMissingMetadata();
-
- // TODO mimicking insertion of null symbol from incremental linker.
- // This will need to moved.
try self.locals.append(self.base.allocator, .{
.n_strx = 0,
.n_type = macho.N_UNDF,
@@ -557,13 +554,56 @@ pub fn flush(self: *MachO, comp: *Compilation) !void {
}
if (needs_full_relink) {
+ for (self.objects.items) |*object| {
+ object.free(self.base.allocator, self);
+ object.deinit(self.base.allocator);
+ }
self.objects.clearRetainingCapacity();
+
+ for (self.archives.items) |*archive| {
+ archive.deinit(self.base.allocator);
+ }
self.archives.clearRetainingCapacity();
+
+ for (self.dylibs.items) |*dylib| {
+ dylib.deinit(self.base.allocator);
+ }
self.dylibs.clearRetainingCapacity();
self.dylibs_map.clearRetainingCapacity();
self.referenced_dylibs.clearRetainingCapacity();
- // TODO figure out how to clear atoms from objects, etc.
+ {
+ var to_remove = std.ArrayList(u32).init(self.base.allocator);
+ defer to_remove.deinit();
+ var it = self.symbol_resolver.iterator();
+ while (it.next()) |entry| {
+ const key = entry.key_ptr.*;
+ const value = entry.value_ptr.*;
+ if (value.file != null) {
+ try to_remove.append(key);
+ }
+ }
+
+ for (to_remove.items) |key| {
+ if (self.symbol_resolver.fetchRemove(key)) |entry| {
+ const resolv = entry.value;
+ switch (resolv.where) {
+ .global => {
+ self.globals_free_list.append(self.base.allocator, resolv.where_index) catch {};
+ const sym = &self.globals.items[resolv.where_index];
+ sym.n_strx = 0;
+ sym.n_type = 0;
+ sym.n_value = 0;
+ },
+ .undef => {
+ const sym = &self.undefs.items[resolv.where_index];
+ sym.n_strx = 0;
+ sym.n_desc = 0;
+ },
+ }
+ }
+ }
+ }
// Positional arguments to the linker such as object files and static archives.
var positionals = std.ArrayList([]const u8).init(arena);
@@ -802,13 +842,35 @@ pub fn flush(self: *MachO, comp: *Compilation) !void {
try self.createDsoHandleAtom();
try self.addCodeSignatureLC();
+ // log.warn("locals:", .{});
+ // for (self.locals.items) |sym, id| {
+ // log.warn(" {d}: {s}: {}", .{ id, self.getString(sym.n_strx), sym });
+ // }
+ // log.warn("globals:", .{});
+ // for (self.globals.items) |sym, id| {
+ // log.warn(" {d}: {s}: {}", .{ id, self.getString(sym.n_strx), sym });
+ // }
+ // log.warn("undefs:", .{});
+ // for (self.undefs.items) |sym, id| {
+ // log.warn(" {d}: {s}: {}", .{ id, self.getString(sym.n_strx), sym });
+ // }
+ // {
+ // log.warn("resolver:", .{});
+ // var it = self.symbol_resolver.iterator();
+ // while (it.next()) |entry| {
+ // log.warn(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* });
+ // }
+ // }
+
for (self.unresolved.keys()) |index| {
const sym = self.undefs.items[index];
const sym_name = self.getString(sym.n_strx);
const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable;
log.err("undefined reference to symbol '{s}'", .{sym_name});
- log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name});
+ if (resolv.file) |file| {
+ log.err(" first referenced in '{s}'", .{self.objects.items[file].name});
+ }
}
if (self.unresolved.count() > 0) {
return error.UndefinedSymbolReference;
@@ -2349,7 +2411,9 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void {
!(symbolIsWeakDef(global.*) or symbolIsPext(global.*)))
{
log.err("symbol '{s}' defined multiple times", .{sym_name});
- log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name});
+ if (resolv.file) |file| {
+ log.err(" first definition in '{s}'", .{self.objects.items[file].name});
+ }
log.err(" next definition in '{s}'", .{object.name});
return error.MultipleSymbolDefinitions;
} else if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it.
@@ -2632,10 +2696,10 @@ fn parseObjectsIntoAtoms(self: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
- var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator);
+ var parsed_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator);
defer parsed_atoms.deinit();
- var first_atoms = Object.ParsedAtoms.init(self.base.allocator);
+ var first_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator);
defer first_atoms.deinit();
var section_metadata = std.AutoHashMap(MatchingSection, struct {
@@ -2644,13 +2708,12 @@ fn parseObjectsIntoAtoms(self: *MachO) !void {
}).init(self.base.allocator);
defer section_metadata.deinit();
- for (self.objects.items) |*object, object_id| {
+ for (self.objects.items) |*object| {
if (object.analyzed) continue;
- var atoms_in_objects = try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_id), self);
- defer atoms_in_objects.deinit();
+ try object.parseIntoAtoms(self.base.allocator, self);
- var it = atoms_in_objects.iterator();
+ var it = object.end_atoms.iterator();
while (it.next()) |entry| {
const match = entry.key_ptr.*;
const last_atom = entry.value_ptr.*;
@@ -3292,8 +3355,6 @@ pub fn updateDeclExports(
decl: *Module.Decl,
exports: []const *Module.Export,
) !void {
- // TODO If we are exporting with global linkage, check for already defined globals and flag
- // symbol duplicate/collision!
if (build_options.skip_non_native and builtin.object_format != .macho) {
@panic("Attempted to compile for object format that was disabled by build configuration");
}
@@ -3303,7 +3364,7 @@ pub fn updateDeclExports(
const tracy = trace(@src());
defer tracy.end();
- try self.globals.ensureCapacity(self.base.allocator, self.globals.items.len + exports.len);
+ try self.globals.ensureUnusedCapacity(self.base.allocator, exports.len);
if (decl.link.macho.local_sym_index == 0) return;
const decl_sym = &self.locals.items[decl.link.macho.local_sym_index];
@@ -3313,15 +3374,76 @@ pub fn updateDeclExports(
if (exp.options.section) |section_name| {
if (!mem.eql(u8, section_name, "__text")) {
- try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1);
- module.failed_exports.putAssumeCapacityNoClobber(
+ try module.failed_exports.putNoClobber(
+ module.gpa,
exp,
- try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: ExportOptions.section", .{}),
+ try Module.ErrorMsg.create(
+ self.base.allocator,
+ decl.srcLoc(),
+ "Unimplemented: ExportOptions.section",
+ .{},
+ ),
);
continue;
}
}
+ if (exp.options.linkage == .LinkOnce) {
+ try module.failed_exports.putNoClobber(
+ module.gpa,
+ exp,
+ try Module.ErrorMsg.create(
+ self.base.allocator,
+ decl.srcLoc(),
+ "Unimplemented: GlobalLinkage.LinkOnce",
+ .{},
+ ),
+ );
+ continue;
+ }
+
+ const is_weak = exp.options.linkage == .Internal or exp.options.linkage == .Weak;
+ const n_strx = try self.makeString(exp_name);
+ if (self.symbol_resolver.getPtr(n_strx)) |resolv| {
+ switch (resolv.where) {
+ .global => {
+ if (resolv.local_sym_index == decl.link.macho.local_sym_index) continue;
+
+ const sym = &self.globals.items[resolv.where_index];
+
+ if (symbolIsTentative(sym.*)) {
+ _ = self.tentatives.fetchSwapRemove(resolv.where_index);
+ } else if (!is_weak and !(symbolIsWeakDef(sym.*) or symbolIsPext(sym.*))) {
+ _ = try module.failed_exports.put(
+ module.gpa,
+ exp,
+ try Module.ErrorMsg.create(
+ self.base.allocator,
+ decl.srcLoc(),
+ \\LinkError: symbol '{s}' defined multiple times
+ \\ first definition in '{s}'
+ ,
+ .{ exp_name, self.objects.items[resolv.file.?].name },
+ ),
+ );
+ continue;
+ } else if (is_weak) continue; // Current symbol is weak, so skip it.
+
+ // Otherwise, update the resolver and the global symbol.
+ sym.n_type = macho.N_SECT | macho.N_EXT;
+ resolv.local_sym_index = decl.link.macho.local_sym_index;
+ resolv.file = null;
+ exp.link.macho.sym_index = resolv.where_index;
+
+ continue;
+ },
+ .undef => {
+ _ = self.unresolved.fetchSwapRemove(resolv.where_index);
+ _ = self.symbol_resolver.remove(n_strx);
+ },
+ }
+ }
+
var n_type: u8 = macho.N_SECT | macho.N_EXT;
var n_desc: u16 = 0;
@@ -3339,14 +3461,7 @@ pub fn updateDeclExports(
// Symbol's n_type is like for a symbol with strong linkage.
n_desc |= macho.N_WEAK_DEF;
},
- .LinkOnce => {
- try module.failed_exports.ensureCapacity(module.gpa, module.failed_exports.count() + 1);
- module.failed_exports.putAssumeCapacityNoClobber(
- exp,
- try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(), "Unimplemented: GlobalLinkage.LinkOnce", .{}),
- );
- continue;
- },
+ else => unreachable,
}
const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: {
@@ -3356,8 +3471,6 @@ pub fn updateDeclExports(
};
break :blk i;
};
-
- const n_strx = try self.makeString(exp_name);
const sym = &self.globals.items[global_sym_index];
sym.* = .{
.n_strx = try self.makeString(exp_name),
@@ -3368,12 +3481,11 @@ pub fn updateDeclExports(
};
exp.link.macho.sym_index = global_sym_index;
- const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, n_strx);
- resolv.value_ptr.* = .{
+ try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{
.where = .global,
.where_index = global_sym_index,
.local_sym_index = decl.link.macho.local_sym_index,
- };
+ });
}
}
@@ -3381,8 +3493,11 @@ pub fn deleteExport(self: *MachO, exp: Export) void {
const sym_index = exp.sym_index orelse return;
self.globals_free_list.append(self.base.allocator, sym_index) catch {};
const global = &self.globals.items[sym_index];
- global.n_type = 0;
+ log.debug("deleting export '{s}': {}", .{ self.getString(global.n_strx), global });
assert(self.symbol_resolver.remove(global.n_strx));
+ global.n_type = 0;
+ global.n_strx = 0;
+ global.n_value = 0;
}
pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
@@ -4403,6 +4518,7 @@ fn writeDyldInfoData(self: *MachO) !void {
const base_address = text_segment.inner.vmaddr;
for (self.globals.items) |sym| {
+ if (sym.n_type == 0) continue;
const sym_name = self.getString(sym.n_strx);
log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value });
@@ -4655,7 +4771,7 @@ fn writeSymbolTable(self: *MachO) !void {
.n_value = object.mtime orelse 0,
});
- for (object.atoms.items) |atom| {
+ for (object.contained_atoms.items) |atom| {
if (atom.stab) |stab| {
const nlists = try stab.asNlists(atom.local_sym_index, self);
defer self.base.allocator.free(nlists);
diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig
index 298855934e..6dbe853451 100644
--- a/src/link/MachO/Atom.zig
+++ b/src/link/MachO/Atom.zig
@@ -645,7 +645,6 @@ const RelocContext = struct {
allocator: *Allocator,
object: *Object,
macho_file: *MachO,
- parsed_atoms: *Object.ParsedAtoms,
};
fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation {
@@ -877,12 +876,16 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
.sect = context.macho_file.got_section_index.?,
};
- if (context.parsed_atoms.getPtr(match)) |last| {
+ if (!context.object.start_atoms.contains(match)) {
+ try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
+ }
+
+ if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
- try context.parsed_atoms.putNoClobber(match, atom);
+ try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
} else if (parsed_rel.payload == .unsigned) {
switch (parsed_rel.where) {
@@ -939,52 +942,63 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
if (parsed_rel.where != .undef) break :blk;
if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk;
- const stub_helper_atom = try context.macho_file.createStubHelperAtom();
- const laptr_atom = try context.macho_file.createLazyPointerAtom(
- stub_helper_atom.local_sym_index,
- parsed_rel.where_index,
- );
- const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
- try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom);
// TODO clean this up!
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stub_helper_section_index.?,
- })) |last| {
- last.*.next = stub_helper_atom;
- stub_helper_atom.prev = last.*;
- last.* = stub_helper_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
+ const stub_helper_atom = atom: {
+ const atom = try context.macho_file.createStubHelperAtom();
+ const match = MachO.MatchingSection{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stub_helper_section_index.?,
- }, stub_helper_atom);
- }
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stubs_section_index.?,
- })) |last| {
- last.*.next = stub_atom;
- stub_atom.prev = last.*;
- last.* = stub_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stubs_section_index.?,
- }, stub_atom);
- }
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.data_segment_cmd_index.?,
- .sect = context.macho_file.la_symbol_ptr_section_index.?,
- })) |last| {
- last.*.next = laptr_atom;
- laptr_atom.prev = last.*;
- last.* = laptr_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
+ };
+ if (!context.object.start_atoms.contains(match)) {
+ try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ if (context.object.end_atoms.getPtr(match)) |last| {
+ last.*.next = atom;
+ atom.prev = last.*;
+ last.* = atom;
+ } else {
+ try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ break :atom atom;
+ };
+ const laptr_atom = atom: {
+ const atom = try context.macho_file.createLazyPointerAtom(
+ stub_helper_atom.local_sym_index,
+ parsed_rel.where_index,
+ );
+ const match = MachO.MatchingSection{
.seg = context.macho_file.data_segment_cmd_index.?,
.sect = context.macho_file.la_symbol_ptr_section_index.?,
- }, laptr_atom);
+ };
+ if (!context.object.start_atoms.contains(match)) {
+ try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ if (context.object.end_atoms.getPtr(match)) |last| {
+ last.*.next = atom;
+ atom.prev = last.*;
+ last.* = atom;
+ } else {
+ try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ break :atom atom;
+ };
+ {
+ const atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
+ const match = MachO.MatchingSection{
+ .seg = context.macho_file.text_segment_cmd_index.?,
+ .sect = context.macho_file.stubs_section_index.?,
+ };
+ if (!context.object.start_atoms.contains(match)) {
+ try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ if (context.object.end_atoms.getPtr(match)) |last| {
+ last.*.next = atom;
+ atom.prev = last.*;
+ last.* = atom;
+ } else {
+ try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
+ }
+ try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, atom);
}
}
}
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
index 27da019be8..12c480b0f1 100644
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@@ -31,14 +31,12 @@ header: ?macho.mach_header_64 = null,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
segment_cmd_index: ?u16 = null,
+text_section_index: ?u16 = null,
symtab_cmd_index: ?u16 = null,
dysymtab_cmd_index: ?u16 = null,
build_version_cmd_index: ?u16 = null,
data_in_code_cmd_index: ?u16 = null,
-text_section_index: ?u16 = null,
-mod_init_func_section_index: ?u16 = null,
-
// __DWARF segment sections
dwarf_debug_info_index: ?u16 = null,
dwarf_debug_abbrev_index: ?u16 = null,
@@ -56,7 +54,9 @@ tu_name: ?[]const u8 = null,
tu_comp_dir: ?[]const u8 = null,
mtime: ?u64 = null,
-atoms: std.ArrayListUnmanaged(*Atom) = .{},
+contained_atoms: std.ArrayListUnmanaged(*Atom) = .{},
+start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
+end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{},
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
// TODO symbol mapping and its inverse can probably be simple arrays
@@ -138,12 +138,15 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
self.data_in_code_entries.deinit(allocator);
self.symtab.deinit(allocator);
self.strtab.deinit(allocator);
- self.atoms.deinit(allocator);
self.sections_as_symbols.deinit(allocator);
self.symbol_mapping.deinit(allocator);
self.reverse_symbol_mapping.deinit(allocator);
allocator.free(self.name);
+ self.contained_atoms.deinit(allocator);
+ self.start_atoms.deinit(allocator);
+ self.end_atoms.deinit(allocator);
+
if (self.debug_info) |*db| {
db.deinit(allocator);
}
@@ -157,6 +160,67 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
}
}
+pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void {
+ log.debug("freeObject {*}", .{self});
+
+ var it = self.end_atoms.iterator();
+ while (it.next()) |entry| {
+ const match = entry.key_ptr.*;
+ const first_atom = self.start_atoms.get(match).?;
+ const last_atom = entry.value_ptr.*;
+ var atom = first_atom;
+
+ while (true) {
+ if (atom.local_sym_index != 0) {
+ macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
+ const local = &macho_file.locals.items[atom.local_sym_index];
+ local.n_type = 0;
+ atom.local_sym_index = 0;
+ }
+ if (atom == last_atom) {
+ break;
+ }
+ if (atom.next) |next| {
+ atom = next;
+ } else break;
+ }
+ }
+
+ self.freeAtoms(macho_file);
+}
+
+fn freeAtoms(self: *Object, macho_file: *MachO) void {
+ var it = self.end_atoms.iterator();
+ while (it.next()) |entry| {
+ const match = entry.key_ptr.*;
+ var first_atom: *Atom = self.start_atoms.get(match).?;
+ var last_atom: *Atom = entry.value_ptr.*;
+
+ if (macho_file.atoms.getPtr(match)) |atom_ptr| {
+ if (atom_ptr.* == last_atom) {
+ if (first_atom.prev) |prev| {
+ // TODO shrink the section size here
+ atom_ptr.* = prev;
+ } else {
+ _ = macho_file.atoms.fetchRemove(match);
+ }
+ }
+ }
+
+ if (first_atom.prev) |prev| {
+ prev.next = last_atom.next;
+ } else {
+ first_atom.prev = null;
+ }
+
+ if (last_atom.next) |next| {
+ next.prev = last_atom.prev;
+ } else {
+ last_atom.next = null;
+ }
+ }
+}
+
pub fn parse(self: *Object, allocator: *Allocator, target: std.Target) !void {
const reader = self.file.reader();
if (self.file_offset) |offset| {
@@ -226,10 +290,6 @@ pub fn readLoadCommands(self: *Object, allocator: *Allocator, reader: anytype) !
if (mem.eql(u8, sectname, "__text")) {
self.text_section_index = index;
}
- } else if (mem.eql(u8, segname, "__DATA")) {
- if (mem.eql(u8, sectname, "__mod_init_func")) {
- self.mod_init_func_section_index = index;
- }
}
sect.offset += offset;
@@ -320,7 +380,6 @@ const Context = struct {
object: *Object,
macho_file: *MachO,
match: MachO.MatchingSection,
- parsed_atoms: *ParsedAtoms,
};
const AtomParser = struct {
@@ -437,7 +496,6 @@ const AtomParser = struct {
.allocator = context.allocator,
.object = context.object,
.macho_file = context.macho_file,
- .parsed_atoms = context.parsed_atoms,
});
if (context.macho_file.has_dices) {
@@ -463,18 +521,10 @@ const AtomParser = struct {
}
};
-pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom);
-
-pub fn parseIntoAtoms(
- self: *Object,
- allocator: *Allocator,
- object_id: u16,
- macho_file: *MachO,
-) !ParsedAtoms {
+pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
- var parsed_atoms = ParsedAtoms.init(allocator);
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.debug("analysing {s}", .{self.name});
@@ -540,16 +590,6 @@ pub fn parseIntoAtoms(
// Symbols within this section only.
const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
- // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense
- // to do this in a standalone pass after we parse the sections as atoms.
- // In release mode, if the object file was generated with dead code stripping optimisations,
- // note it now and parse sections as atoms.
- // const is_splittable = blk: {
- // if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
- // break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
- // };
- const is_splittable = false;
-
macho_file.has_dices = macho_file.has_dices or blk: {
if (self.text_section_index) |index| {
if (index != id) break :blk false;
@@ -560,237 +600,108 @@ pub fn parseIntoAtoms(
};
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
- next: {
- if (is_splittable) atoms: {
- if (filtered_nlists.len == 0) break :atoms;
-
- // If the first nlist does not match the start of the section,
- // then we need to encapsulate the memory range [section start, first symbol)
- // as a temporary symbol and insert the matching Atom.
- const first_nlist = filtered_nlists[0].nlist;
- if (first_nlist.n_value > sect.addr) {
- const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
- self.name,
- segmentName(sect),
- sectionName(sect),
- });
- defer allocator.free(sym_name);
-
- const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
- const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
- try macho_file.locals.append(allocator, .{
- .n_strx = try macho_file.makeString(sym_name),
- .n_type = macho.N_SECT,
- .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
- .n_desc = 0,
- .n_value = 0,
- });
- try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
- break :blk atom_local_sym_index;
- };
- const atom_code = code[0 .. first_nlist.n_value - sect.addr];
- const atom_size = atom_code.len;
- const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align");
-
- const is_zerofill = blk: {
- const section_type = commands.sectionType(sect);
- break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
- };
- if (!is_zerofill) {
- mem.copy(u8, atom.code.items, atom_code);
- }
-
- try atom.parseRelocs(relocs, .{
- .base_addr = sect.addr,
- .base_offset = 0,
- .allocator = allocator,
- .object = self,
- .macho_file = macho_file,
- .parsed_atoms = &parsed_atoms,
- });
-
- if (macho_file.has_dices) {
- const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size);
- try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
- for (dices) |dice| {
- atom.dices.appendAssumeCapacity(.{
- .offset = dice.offset - try math.cast(u32, sect.addr),
- .length = dice.length,
- .kind = dice.kind,
- });
- }
- }
-
- if (parsed_atoms.getPtr(match)) |last| {
- last.*.next = atom;
- atom.prev = last.*;
- last.* = atom;
- } else {
- try parsed_atoms.putNoClobber(match, atom);
- }
- try self.atoms.append(allocator, atom);
- }
-
- var parser = AtomParser{
- .section = sect,
- .code = code,
- .relocs = relocs,
- .nlists = filtered_nlists,
- };
-
- while (try parser.next(.{
- .allocator = allocator,
- .object = self,
- .macho_file = macho_file,
- .match = match,
- .parsed_atoms = &parsed_atoms,
- })) |atom| {
- const sym = macho_file.locals.items[atom.local_sym_index];
- const is_ext = blk: {
- const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable;
- break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
- };
- if (is_ext) {
- if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| {
- assert(resolv.where == .global);
- if (resolv.file != object_id) {
- log.debug("deduping definition of {s} in {s}", .{
- macho_file.getString(sym.n_strx),
- self.name,
- });
- log.debug(" already defined in {s}", .{
- macho_file.objects.items[resolv.file].name,
- });
- continue;
- }
- }
- }
+ // Since there is no symbol to refer to this atom, we create
+ // a temp one, unless we already did that when working out the relocations
+ // of other atoms.
+ const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
+ self.name,
+ segmentName(sect),
+ sectionName(sect),
+ });
+ defer allocator.free(sym_name);
+
+ const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
+ const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
+ try macho_file.locals.append(allocator, .{
+ .n_strx = try macho_file.makeString(sym_name),
+ .n_type = macho.N_SECT,
+ .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
+ .n_desc = 0,
+ .n_value = 0,
+ });
+ try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
+ break :blk atom_local_sym_index;
+ };
+ const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
- if (sym.n_value == sect.addr) {
- if (self.sections_as_symbols.get(sect_id)) |alias| {
- // In x86_64 relocs, it can so happen that the compiler refers to the same
- // atom by both the actual assigned symbol and the start of the section. In this
- // case, we need to link the two together so add an alias.
- try atom.aliases.append(allocator, alias);
- }
- }
+ const is_zerofill = blk: {
+ const section_type = commands.sectionType(sect);
+ break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
+ };
+ if (!is_zerofill) {
+ mem.copy(u8, atom.code.items, code);
+ }
- if (parsed_atoms.getPtr(match)) |last| {
- last.*.next = atom;
- atom.prev = last.*;
- last.* = atom;
- } else {
- try parsed_atoms.putNoClobber(match, atom);
- }
- try self.atoms.append(allocator, atom);
- }
+ try atom.parseRelocs(relocs, .{
+ .base_addr = sect.addr,
+ .base_offset = 0,
+ .allocator = allocator,
+ .object = self,
+ .macho_file = macho_file,
+ });
- break :next;
- }
+ if (macho_file.has_dices) {
+ const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
+ try atom.dices.ensureTotalCapacity(allocator, dices.len);
- // Since there is no symbol to refer to this atom, we create
- // a temp one, unless we already did that when working out the relocations
- // of other atoms.
- const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
- self.name,
- segmentName(sect),
- sectionName(sect),
- });
- defer allocator.free(sym_name);
-
- const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
- const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
- try macho_file.locals.append(allocator, .{
- .n_strx = try macho_file.makeString(sym_name),
- .n_type = macho.N_SECT,
- .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
- .n_desc = 0,
- .n_value = 0,
+ for (dices) |dice| {
+ atom.dices.appendAssumeCapacity(.{
+ .offset = dice.offset - try math.cast(u32, sect.addr),
+ .length = dice.length,
+ .kind = dice.kind,
});
- try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
- break :blk atom_local_sym_index;
- };
- const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
-
- const is_zerofill = blk: {
- const section_type = commands.sectionType(sect);
- break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
- };
- if (!is_zerofill) {
- mem.copy(u8, atom.code.items, code);
- }
-
- try atom.parseRelocs(relocs, .{
- .base_addr = sect.addr,
- .base_offset = 0,
- .allocator = allocator,
- .object = self,
- .macho_file = macho_file,
- .parsed_atoms = &parsed_atoms,
- });
-
- if (macho_file.has_dices) {
- const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
- try atom.dices.ensureTotalCapacity(allocator, dices.len);
-
- for (dices) |dice| {
- atom.dices.appendAssumeCapacity(.{
- .offset = dice.offset - try math.cast(u32, sect.addr),
- .length = dice.length,
- .kind = dice.kind,
- });
- }
}
+ }
- // Since this is atom gets a helper local temporary symbol that didn't exist
- // in the object file which encompasses the entire section, we need traverse
- // the filtered symbols and note which symbol is contained within so that
- // we can properly allocate addresses down the line.
- // While we're at it, we need to update segment,section mapping of each symbol too.
- try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
-
- for (filtered_nlists) |nlist_with_index| {
- const nlist = nlist_with_index.nlist;
- const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
- const local = &macho_file.locals.items[local_sym_index];
- local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
-
- const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
- // TODO there has to be a better to handle this.
- for (di.inner.func_list.items) |func| {
- if (func.pc_range) |range| {
- if (nlist.n_value >= range.start and nlist.n_value < range.end) {
- break :blk Atom.Stab{
- .function = range.end - range.start,
- };
- }
+ // Since this is atom gets a helper local temporary symbol that didn't exist
+ // in the object file which encompasses the entire section, we need traverse
+ // the filtered symbols and note which symbol is contained within so that
+ // we can properly allocate addresses down the line.
+ // While we're at it, we need to update segment,section mapping of each symbol too.
+ try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
+
+ for (filtered_nlists) |nlist_with_index| {
+ const nlist = nlist_with_index.nlist;
+ const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable;
+ const local = &macho_file.locals.items[local_sym_index];
+ local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
+
+ const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
+ // TODO there has to be a better to handle this.
+ for (di.inner.func_list.items) |func| {
+ if (func.pc_range) |range| {
+ if (nlist.n_value >= range.start and nlist.n_value < range.end) {
+ break :blk Atom.Stab{
+ .function = range.end - range.start,
+ };
}
}
- // TODO
- // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
- break :blk .static;
- } else null;
-
- atom.contained.appendAssumeCapacity(.{
- .local_sym_index = local_sym_index,
- .offset = nlist.n_value - sect.addr,
- .stab = stab,
- });
- }
+ }
+ // TODO
+ // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global;
+ break :blk .static;
+ } else null;
+
+ atom.contained.appendAssumeCapacity(.{
+ .local_sym_index = local_sym_index,
+ .offset = nlist.n_value - sect.addr,
+ .stab = stab,
+ });
+ }
- if (parsed_atoms.getPtr(match)) |last| {
- last.*.next = atom;
- atom.prev = last.*;
- last.* = atom;
- } else {
- try parsed_atoms.putNoClobber(match, atom);
- }
- try self.atoms.append(allocator, atom);
+ if (!self.start_atoms.contains(match)) {
+ try self.start_atoms.putNoClobber(allocator, match, atom);
}
- }
- return parsed_atoms;
+ if (self.end_atoms.getPtr(match)) |last| {
+ last.*.next = atom;
+ atom.prev = last.*;
+ last.* = atom;
+ } else {
+ try self.end_atoms.putNoClobber(allocator, match, atom);
+ }
+ try self.contained_atoms.append(allocator, atom);
+ }
}
fn parseSymtab(self: *Object, allocator: *Allocator) !void {