aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2021-10-14 13:50:10 +0200
committerJakub Konka <kubkon@jakubkonka.com>2021-10-22 12:50:25 +0200
commitd0dceae736edb43d4c217306a2b0445277f184ce (patch)
treed6bad107975e61e360a50de5436fb297a97435fb /src
parent912e7dc54b9b49d96123ffd398e6d40b455997fe (diff)
downloadzig-d0dceae736edb43d4c217306a2b0445277f184ce.tar.gz
zig-d0dceae736edb43d4c217306a2b0445277f184ce.zip
macho: dump linker's state as JSON
Each element of the output JSON has the VM address of the generated binary nondecreasing (some elements might occupy the same VM address for example the atom and the relocation might coincide in the address space). The generated JSON can be inspected manually or via a preview tool `zig-snapshots` that I am currently working on and will allow the user to inspect interactively the state of the linker together with the positioning of sections, symbols, atoms and relocations within each snapshot state, and in the future, between snapshots too. This should allow for quicker debugging of the linker which is nontrivial when run in the incremental mode. Note that the state will only be dumped if the compiler is built with `-Dlink-snapshot` flag on, and then the compiler is passed `--debug-link-snapshot` flag upon compiling a source/project.
Diffstat (limited to 'src')
-rw-r--r--src/Compilation.zig3
-rw-r--r--src/config.zig.in1
-rw-r--r--src/link.zig3
-rw-r--r--src/link/MachO.zig296
-rw-r--r--src/link/MachO/Atom.zig8
-rw-r--r--src/link/MachO/Object.zig16
-rw-r--r--src/main.zig9
7 files changed, 319 insertions, 17 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig
index 63a6b50d5b..a7a76633ab 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -757,6 +757,8 @@ pub const InitOptions = struct {
subsystem: ?std.Target.SubSystem = null,
/// WASI-only. Type of WASI execution model ("command" or "reactor").
wasi_exec_model: ?std.builtin.WasiExecModel = null,
+ /// (Zig compiler development) Enable dumping linker's state as JSON.
+ enable_link_snapshots: bool = false,
};
fn addPackageTableToCacheHash(
@@ -1438,6 +1440,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
.is_test = options.is_test,
.wasi_exec_model = wasi_exec_model,
.use_stage1 = use_stage1,
+ .enable_link_snapshots = options.enable_link_snapshots,
});
errdefer bin_file.destroy();
comp.* = .{
diff --git a/src/config.zig.in b/src/config.zig.in
index 62e8785ccb..f193fddb20 100644
--- a/src/config.zig.in
+++ b/src/config.zig.in
@@ -6,6 +6,7 @@ pub const llvm_has_arc = false;
pub const version: [:0]const u8 = "@ZIG_VERSION@";
pub const semver = @import("std").SemanticVersion.parse(version) catch unreachable;
pub const enable_logging: bool = @ZIG_ENABLE_LOGGING_BOOL@;
+pub const enable_link_snapshots: bool = false;
pub const enable_tracy = false;
pub const is_stage1 = true;
pub const skip_non_native = false;
diff --git a/src/link.zig b/src/link.zig
index 5874ed5703..8cb2c4a485 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -126,6 +126,9 @@ pub const Options = struct {
/// WASI-only. Type of WASI execution model ("command" or "reactor").
wasi_exec_model: std.builtin.WasiExecModel = undefined,
+ /// (Zig compiler development) Enable dumping of linker's state as JSON.
+ enable_link_snapshots: bool = false,
+
pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode {
return if (options.use_lld) .Obj else options.output_mode;
}
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 8d951fe878..923811af36 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -938,6 +938,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
if (self.requires_adhoc_codesig) {
try self.writeCodeSignature(); // code signing always comes last
}
+
+ if (build_options.enable_link_snapshots) {
+ if (self.base.options.enable_link_snapshots)
+ try self.snapshotState();
+ }
}
cache: {
@@ -2424,6 +2429,14 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void {
continue;
},
.undef => {
+ const undef = &self.undefs.items[resolv.where_index];
+ undef.* = .{
+ .n_strx = 0,
+ .n_type = macho.N_UNDF,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
_ = self.unresolved.fetchSwapRemove(resolv.where_index);
},
}
@@ -4826,9 +4839,17 @@ fn writeSymbolTable(self: *MachO) !void {
}
}
+ var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator);
+ defer undefs.deinit();
+
+ for (self.undefs.items) |sym| {
+ if (sym.n_strx == 0) continue;
+ try undefs.append(sym);
+ }
+
const nlocals = locals.items.len;
const nexports = self.globals.items.len;
- const nundefs = self.undefs.items.len;
+ const nundefs = undefs.items.len;
const locals_off = symtab.symoff;
const locals_size = nlocals * @sizeOf(macho.nlist_64);
@@ -4843,7 +4864,7 @@ fn writeSymbolTable(self: *MachO) !void {
const undefs_off = exports_off + exports_size;
const undefs_size = nundefs * @sizeOf(macho.nlist_64);
log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off });
- try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off);
+ try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off);
symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs);
seg.inner.filesize += locals_size + exports_size + undefs_size;
@@ -5188,3 +5209,274 @@ pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anyty
}
return i;
}
+
+fn snapshotState(self: *MachO) !void {
+ const emit = self.base.options.emit orelse {
+ log.debug("no emit directory found; skipping snapshot...", .{});
+ return;
+ };
+
+ const Snapshot = struct {
+ const Node = struct {
+ const Tag = enum {
+ section_start,
+ section_end,
+ atom_start,
+ atom_end,
+ relocation,
+
+ pub fn jsonStringify(
+ tag: Tag,
+ options: std.json.StringifyOptions,
+ out_stream: anytype,
+ ) !void {
+ _ = options;
+ switch (tag) {
+ .section_start => try out_stream.writeAll("\"section_start\""),
+ .section_end => try out_stream.writeAll("\"section_end\""),
+ .atom_start => try out_stream.writeAll("\"atom_start\""),
+ .atom_end => try out_stream.writeAll("\"atom_end\""),
+ .relocation => try out_stream.writeAll("\"relocation\""),
+ }
+ }
+ };
+ const Payload = struct {
+ name: []const u8 = "",
+ aliases: [][]const u8 = &[0][]const u8{},
+ is_global: bool = false,
+ target: u64 = 0,
+ };
+ address: u64,
+ tag: Tag,
+ payload: Payload,
+ };
+ timestamp: i128,
+ nodes: []Node,
+ };
+
+ var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator);
+ defer arena_allocator.deinit();
+ const arena = &arena_allocator.allocator;
+
+ const out_file = try emit.directory.handle.createFile("snapshots.json", .{
+ .truncate = self.cold_start,
+ .read = true,
+ });
+ defer out_file.close();
+
+ if (out_file.seekFromEnd(-1)) {
+ try out_file.writer().writeByte(',');
+ } else |err| switch (err) {
+ error.Unseekable => try out_file.writer().writeByte('['),
+ else => |e| return e,
+ }
+ var writer = out_file.writer();
+
+ var snapshot = Snapshot{
+ .timestamp = std.time.nanoTimestamp(),
+ .nodes = undefined,
+ };
+ var nodes = std.ArrayList(Snapshot.Node).init(arena);
+
+ for (self.section_ordinals.keys()) |key| {
+ const seg = self.load_commands.items[key.seg].Segment;
+ const sect = seg.sections.items[key.sect];
+ const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{
+ commands.segmentName(sect),
+ commands.sectionName(sect),
+ });
+ try nodes.append(.{
+ .address = sect.addr,
+ .tag = .section_start,
+ .payload = .{ .name = sect_name },
+ });
+
+ var atom: *Atom = self.atoms.get(key) orelse {
+ try nodes.append(.{
+ .address = sect.addr + sect.size,
+ .tag = .section_end,
+ .payload = .{},
+ });
+ continue;
+ };
+
+ while (atom.prev) |prev| {
+ atom = prev;
+ }
+
+ while (true) {
+ const atom_sym = self.locals.items[atom.local_sym_index];
+ var node = Snapshot.Node{
+ .address = atom_sym.n_value,
+ .tag = .atom_start,
+ .payload = .{
+ .name = self.getString(atom_sym.n_strx),
+ .is_global = self.symbol_resolver.contains(atom_sym.n_strx),
+ },
+ };
+
+ var aliases = std.ArrayList([]const u8).init(arena);
+ for (atom.aliases.items) |loc| {
+ try aliases.append(self.getString(self.locals.items[loc].n_strx));
+ }
+ node.payload.aliases = aliases.toOwnedSlice();
+ try nodes.append(node);
+
+ var relocs = std.ArrayList(Snapshot.Node).init(arena);
+ try relocs.ensureTotalCapacity(atom.relocs.items.len);
+ for (atom.relocs.items) |rel| {
+ const arch = self.base.options.target.cpu.arch;
+ const source_addr = blk: {
+ const sym = self.locals.items[atom.local_sym_index];
+ break :blk sym.n_value + rel.offset;
+ };
+ const target_addr = blk: {
+ const is_via_got = got: {
+ switch (arch) {
+ .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
+ .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => true,
+ else => false,
+ },
+ .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
+ .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
+ else => false,
+ },
+ else => unreachable,
+ }
+ };
+
+ if (is_via_got) {
+ const got_atom = self.got_entries_map.get(rel.target).?;
+ break :blk self.locals.items[got_atom.local_sym_index].n_value;
+ }
+
+ switch (rel.target) {
+ .local => |sym_index| {
+ const sym = self.locals.items[sym_index];
+ const is_tlv = is_tlv: {
+ const source_sym = self.locals.items[atom.local_sym_index];
+ const match = self.section_ordinals.keys()[source_sym.n_sect - 1];
+ const match_seg = self.load_commands.items[match.seg].Segment;
+ const match_sect = match_seg.sections.items[match.sect];
+ break :is_tlv commands.sectionType(match_sect) == macho.S_THREAD_LOCAL_VARIABLES;
+ };
+ if (is_tlv) {
+ const match_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+ const base_address = inner: {
+ if (self.tlv_data_section_index) |i| {
+ break :inner match_seg.sections.items[i].addr;
+ } else if (self.tlv_bss_section_index) |i| {
+ break :inner match_seg.sections.items[i].addr;
+ } else unreachable;
+ };
+ break :blk sym.n_value - base_address;
+ }
+ break :blk sym.n_value;
+ },
+ .global => |n_strx| {
+ const resolv = self.symbol_resolver.get(n_strx).?;
+ switch (resolv.where) {
+ .global => break :blk self.globals.items[resolv.where_index].n_value,
+ .undef => {
+ break :blk if (self.stubs_map.get(n_strx)) |stub_atom|
+ self.locals.items[stub_atom.local_sym_index].n_value
+ else
+ 0;
+ },
+ }
+ },
+ }
+ };
+
+ relocs.appendAssumeCapacity(.{
+ .address = source_addr,
+ .tag = .relocation,
+ .payload = .{ .target = target_addr },
+ });
+ }
+
+ if (atom.contained.items.len == 0) {
+ try nodes.appendSlice(relocs.items);
+ } else {
+ // Need to reverse iteration order of relocs since by default for relocatable sources
+ // they come in reverse. For linking, this doesn't matter in any way, however, for
+ // arranging the memoryline for displaying it does.
+ std.mem.reverse(Snapshot.Node, relocs.items);
+
+ var next_i: usize = 0;
+ var last_rel: usize = 0;
+ while (next_i < atom.contained.items.len) : (next_i += 1) {
+ const loc = atom.contained.items[next_i];
+ const cont_sym = self.locals.items[loc.local_sym_index];
+ const cont_sym_name = self.getString(cont_sym.n_strx);
+ var contained_node = Snapshot.Node{
+ .address = cont_sym.n_value,
+ .tag = .atom_start,
+ .payload = .{
+ .name = cont_sym_name,
+ .is_global = self.symbol_resolver.contains(cont_sym.n_strx),
+ },
+ };
+
+ // Accumulate aliases
+ var inner_aliases = std.ArrayList([]const u8).init(arena);
+ while (true) {
+ if (next_i + 1 >= atom.contained.items.len) break;
+ const next_sym = self.locals.items[atom.contained.items[next_i + 1].local_sym_index];
+ if (next_sym.n_value != cont_sym.n_value) break;
+ const next_sym_name = self.getString(next_sym.n_strx);
+ if (self.symbol_resolver.contains(next_sym.n_strx)) {
+ try inner_aliases.append(contained_node.payload.name);
+ contained_node.payload.name = next_sym_name;
+ contained_node.payload.is_global = true;
+ } else try inner_aliases.append(next_sym_name);
+ next_i += 1;
+ }
+
+ const cont_size = if (next_i + 1 < atom.contained.items.len)
+ self.locals.items[atom.contained.items[next_i + 1].local_sym_index].n_value - cont_sym.n_value
+ else
+ atom_sym.n_value + atom.size - cont_sym.n_value;
+
+ contained_node.payload.aliases = inner_aliases.toOwnedSlice();
+ try nodes.append(contained_node);
+
+ for (relocs.items[last_rel..]) |rel, rel_i| {
+ if (rel.address >= cont_sym.n_value + cont_size) {
+ last_rel = rel_i;
+ break;
+ }
+ try nodes.append(rel);
+ }
+
+ try nodes.append(.{
+ .address = cont_sym.n_value + cont_size,
+ .tag = .atom_end,
+ .payload = .{},
+ });
+ }
+ }
+
+ try nodes.append(.{
+ .address = atom_sym.n_value + atom.size,
+ .tag = .atom_end,
+ .payload = .{},
+ });
+
+ if (atom.next) |next| {
+ atom = next;
+ } else break;
+ }
+
+ try nodes.append(.{
+ .address = sect.addr + sect.size,
+ .tag = .section_end,
+ .payload = .{},
+ });
+ }
+
+ snapshot.nodes = nodes.toOwnedSlice();
+
+ try std.json.stringify(snapshot, .{}, writer);
+ try writer.writeByte(']');
+}
diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig
index c32d1f1d8f..07a20ac336 100644
--- a/src/link/MachO/Atom.zig
+++ b/src/link/MachO/Atom.zig
@@ -345,15 +345,9 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
const sect = seg.sections.items[sect_id];
const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable;
- const sym_name = try std.fmt.allocPrint(context.allocator, "{s}_{s}_{s}", .{
- context.object.name,
- commands.segmentName(sect),
- commands.sectionName(sect),
- });
- defer context.allocator.free(sym_name);
const local_sym_index = @intCast(u32, context.macho_file.locals.items.len);
try context.macho_file.locals.append(context.allocator, .{
- .n_strx = try context.macho_file.makeString(sym_name),
+ .n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig
index de747eb4c7..f0a299182c 100644
--- a/src/link/MachO/Object.zig
+++ b/src/link/MachO/Object.zig
@@ -174,7 +174,13 @@ pub fn free(self: *Object, allocator: *Allocator, macho_file: *MachO) void {
if (atom.local_sym_index != 0) {
macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {};
const local = &macho_file.locals.items[atom.local_sym_index];
- local.n_type = 0;
+ local.* = .{
+ .n_strx = 0,
+ .n_type = 0,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
atom.local_sym_index = 0;
}
if (atom == last_atom) {
@@ -458,15 +464,9 @@ pub fn parseIntoAtoms(self: *Object, allocator: *Allocator, macho_file: *MachO)
// a temp one, unless we already did that when working out the relocations
// of other atoms.
const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
- const sym_name = try std.fmt.allocPrint(allocator, "{s}_{s}_{s}", .{
- self.name,
- segmentName(sect),
- sectionName(sect),
- });
- defer allocator.free(sym_name);
const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
- .n_strx = try macho_file.makeString(sym_name),
+ .n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
diff --git a/src/main.zig b/src/main.zig
index fbe388ed47..3fc2be7e30 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -434,6 +434,7 @@ const usage_build_generic =
\\ --verbose-llvm-cpu-features Enable compiler debug output for LLVM CPU features
\\ --debug-log [scope] Enable printing debug/info log messages for scope
\\ --debug-compile-errors Crash with helpful diagnostics at the first compile error
+ \\ --debug-link-snapshot Enable dumping of the linker's state in JSON format
\\
;
@@ -632,6 +633,7 @@ fn buildOutputType(
var major_subsystem_version: ?u32 = null;
var minor_subsystem_version: ?u32 = null;
var wasi_exec_model: ?std.builtin.WasiExecModel = null;
+ var enable_link_snapshots: bool = false;
var system_libs = std.ArrayList([]const u8).init(gpa);
defer system_libs.deinit();
@@ -929,6 +931,12 @@ fn buildOutputType(
} else {
try log_scopes.append(gpa, args[i]);
}
+ } else if (mem.eql(u8, arg, "--debug-link-snapshot")) {
+ if (!build_options.enable_link_snapshots) {
+ std.log.warn("Zig was compiled without linker snapshots enabled (-Dlink-snapshot). --debug-link-snapshot has no effect.", .{});
+ } else {
+ enable_link_snapshots = true;
+ }
} else if (mem.eql(u8, arg, "-fcompiler-rt")) {
want_compiler_rt = true;
} else if (mem.eql(u8, arg, "-fno-compiler-rt")) {
@@ -2139,6 +2147,7 @@ fn buildOutputType(
.subsystem = subsystem,
.wasi_exec_model = wasi_exec_model,
.debug_compile_errors = debug_compile_errors,
+ .enable_link_snapshots = enable_link_snapshots,
}) catch |err| {
fatal("unable to create compilation: {s}", .{@errorName(err)});
};