wasm-linker: support incremental debug info

Although the wasm-linker previously already supported debug information in incremental-mode, this was no longer working as-is with the addition of supporting object-file-parsed debug information. This commit implements the Zig-created debug information structure from scratch which is a lot more robust and also allows being linked with debug information from other object files.
author: Luuk de Gram <luuk@degram.dev> 2022-09-06 20:21:33 +0200
committer: Luuk de Gram <luuk@degram.dev> 2022-09-07 18:59:36 +0200
commit: a8d137d05ae36870d4e896cf5e37b591d9fa219c (patch)
tree: 720dde8e8f150bb6bb2e63e5506ca337e50eee3d /src/link/Wasm.zig
parent: 971327d6e0a2cdcfd1a7695d1dea86dbbebd730e (diff)
download: zig-a8d137d05ae36870d4e896cf5e37b591d9fa219c.tar.gz
zig-a8d137d05ae36870d4e896cf5e37b591d9fa219c.zip
1 files changed, 112 insertions, 44 deletions
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 67f229ca84..0c5f0e810f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -95,9 +95,10 @@ imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{},
 segments: std.ArrayListUnmanaged(Segment) = .{},
 /// Maps a data segment key (such as .rodata) to the index into `segments`.
 data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
-/// A list of `types.Segment` which provide meta data
-/// about a data symbol such as its name
-segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
+/// A table of `types.Segment` which provide meta data
+/// about a data symbol such as its name where the key is
+/// the segment index, which can be found from `data_segments`
+segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{},
 /// Deduplicated string table for strings used by symbols, imports and exports.
 string_table: StringTable = .{},
 /// Debug information for wasm
@@ -158,6 +159,19 @@ export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{},
 /// The actual table is populated during `flush`.
 error_table_symbol: ?u32 = null,
 
+// Debug section atoms. These are only set when the current compilation
+// unit contains Zig code. The lifetime of these atoms are extended
+// until the end of the compiler's lifetime. Meaning they're not freed
+// during `flush()` in incremental-mode.
+debug_info_atom: ?*Atom = null,
+debug_line_atom: ?*Atom = null,
+debug_loc_atom: ?*Atom = null,
+debug_ranges_atom: ?*Atom = null,
+debug_abbrev_atom: ?*Atom = null,
+debug_str_atom: ?*Atom = null,
+debug_pubnames_atom: ?*Atom = null,
+debug_pubtypes_atom: ?*Atom = null,
+
 pub const Segment = struct {
     alignment: u32,
     size: u32,
@@ -384,15 +398,16 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
 /// and symbols come from the object files instead.
 pub fn initDebugSections(self: *Wasm) !void {
     if (self.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections
+    assert(self.debug_info_index == null);
     // this will create an Atom and set the index for us.
-    try self.createDebugSectionForIndex(&self.debug_info_index);
-    try self.createDebugSectionForIndex(&self.debug_line_index);
-    try self.createDebugSectionForIndex(&self.debug_loc_index);
-    try self.createDebugSectionForIndex(&self.debug_abbrev_index);
-    try self.createDebugSectionForIndex(&self.debug_ranges_index);
-    try self.createDebugSectionForIndex(&self.debug_str_index);
-    try self.createDebugSectionForIndex(&self.debug_pubnames_index);
-    try self.createDebugSectionForIndex(&self.debug_pubtypes_index);
+    self.debug_info_atom = try self.createDebugSectionForIndex(&self.debug_info_index, ".debug_info");
+    self.debug_line_atom = try self.createDebugSectionForIndex(&self.debug_line_index, ".debug_line");
+    self.debug_loc_atom = try self.createDebugSectionForIndex(&self.debug_loc_index, ".debug_loc");
+    self.debug_abbrev_atom = try self.createDebugSectionForIndex(&self.debug_abbrev_index, ".debug_abbrev");
+    self.debug_ranges_atom = try self.createDebugSectionForIndex(&self.debug_ranges_index, ".debug_ranges");
+    self.debug_str_atom = try self.createDebugSectionForIndex(&self.debug_str_index, ".debug_str");
+    self.debug_pubnames_atom = try self.createDebugSectionForIndex(&self.debug_pubnames_index, ".debug_pubnames");
+    self.debug_pubtypes_atom = try self.createDebugSectionForIndex(&self.debug_pubtypes_index, ".debug_pubtypes");
 }
 
 fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
@@ -676,7 +691,7 @@ pub fn deinit(self: *Wasm) void {
     for (self.func_types.items) |*func_type| {
         func_type.deinit(gpa);
     }
-    for (self.segment_info.items) |segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
@@ -1364,16 +1379,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 const index = gop.value_ptr.*;
                 self.segments.items[index].size += atom.size;
 
-                // segment indexes can be off by 1 due to also containing a segment
-                // for the code section, so we must check if the existing segment
-                // is larger than that of the code section, and substract the index by 1 in such case.
-                var info_add = if (self.code_section_index) |idx| blk: {
-                    if (idx < index) break :blk @as(u32, 1);
-                    break :blk 0;
-                } else @as(u32, 0);
-                if (self.debug_info_index != null) info_add += 1;
-                if (self.debug_line_index != null) info_add += 1;
-                symbol.index = index - info_add;
+                symbol.index = @intCast(u32, self.segment_info.getIndex(index).?);
                 // segment info already exists, so free its memory
                 self.base.allocator.free(segment_name);
                 break :result index;
@@ -1386,8 +1392,8 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 });
                 gop.value_ptr.* = index;
 
-                const info_index = @intCast(u32, self.segment_info.items.len);
-                try self.segment_info.append(self.base.allocator, segment_info);
+                const info_index = @intCast(u32, self.segment_info.count());
+                try self.segment_info.put(self.base.allocator, index, segment_info);
                 symbol.index = info_index;
                 break :result index;
             }
@@ -1397,18 +1403,54 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
     const segment: *Segment = &self.segments.items[final_index];
     segment.alignment = std.math.max(segment.alignment, atom.alignment);
 
-    if (self.atoms.getPtr(final_index)) |last| {
+    try self.appendAtomAtIndex(final_index, atom);
+}
+
+/// From a given index, append the given `Atom` at the back of the linked list.
+/// Simply inserts it into the map of atoms when it doesn't exist yet.
+pub fn appendAtomAtIndex(self: *Wasm, index: u32, atom: *Atom) !void {
+    if (self.atoms.getPtr(index)) |last| {
         last.*.next = atom;
         atom.prev = last.*;
         last.* = atom;
     } else {
-        try self.atoms.putNoClobber(self.base.allocator, final_index, atom);
+        try self.atoms.putNoClobber(self.base.allocator, index, atom);
     }
 }
 
+/// Allocates debug atoms into their respective debug sections
+/// to merge them with maybe-existing debug atoms from object files.
+fn allocateDebugAtoms(self: *Wasm) !void {
+    if (self.dwarf == null) return;
+
+    const allocAtom = struct {
+        fn f(bin: *Wasm, maybe_index: *?u32, atom: *Atom) !void {
+            const index = maybe_index.* orelse idx: {
+                const index = @intCast(u32, bin.segments.items.len);
+                try bin.appendDummySegment();
+                maybe_index.* = index;
+                break :idx index;
+            };
+            atom.size = @intCast(u32, atom.code.items.len);
+            bin.symbols.items[atom.sym_index].index = index;
+            try bin.appendAtomAtIndex(index, atom);
+        }
+    }.f;
+
+    try allocAtom(self, &self.debug_info_index, self.debug_info_atom.?);
+    try allocAtom(self, &self.debug_line_index, self.debug_line_atom.?);
+    try allocAtom(self, &self.debug_loc_index, self.debug_loc_atom.?);
+    try allocAtom(self, &self.debug_str_index, self.debug_str_atom.?);
+    try allocAtom(self, &self.debug_ranges_index, self.debug_ranges_atom.?);
+    try allocAtom(self, &self.debug_abbrev_index, self.debug_abbrev_atom.?);
+    try allocAtom(self, &self.debug_pubnames_index, self.debug_pubnames_atom.?);
+    try allocAtom(self, &self.debug_pubtypes_index, self.debug_pubtypes_atom.?);
+}
+
 fn allocateAtoms(self: *Wasm) !void {
     // first sort the data segments
     try sortDataSegments(self);
+    try allocateDebugAtoms(self);
 
     var it = self.atoms.iterator();
     while (it.next()) |entry| {
@@ -1426,7 +1468,7 @@ fn allocateAtoms(self: *Wasm) !void {
                 atom.size,
             });
             offset += atom.size;
-            self.symbol_atom.putAssumeCapacity(atom.symbolLoc(), atom); // Update atom pointers
+            try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom); // Update atom pointers
             atom = atom.next orelse break;
         }
         segment.size = std.mem.alignForwardGeneric(u32, offset, segment.alignment);
@@ -1989,20 +2031,35 @@ fn populateErrorNameTable(self: *Wasm) !void {
 /// From a given index variable, creates a new debug section.
 /// This initializes the index, appends a new segment,
 /// and finally, creates a managed `Atom`.
-pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32) !void {
+pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32, name: []const u8) !*Atom {
     const new_index = @intCast(u32, self.segments.items.len);
     index.* = new_index;
     try self.appendDummySegment();
+    // _ = index;
+
+    const sym_index = self.symbols_free_list.popOrNull() orelse idx: {
+        const tmp_index = @intCast(u32, self.symbols.items.len);
+        _ = try self.symbols.addOne(self.base.allocator);
+        break :idx tmp_index;
+    };
+    self.symbols.items[sym_index] = .{
+        .tag = .section,
+        .name = try self.string_table.put(self.base.allocator, name),
+        .index = 0,
+        .flags = @enumToInt(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
+    };
 
     const atom = try self.base.allocator.create(Atom);
     atom.* = Atom.empty;
     atom.alignment = 1; // debug sections are always 1-byte-aligned
+    atom.sym_index = sym_index;
     try self.managed_atoms.append(self.base.allocator, atom);
-    try self.atoms.put(self.base.allocator, new_index, atom);
+    try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom);
+    return atom;
 }
 
 fn resetState(self: *Wasm) void {
-    for (self.segment_info.items) |*segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         self.base.allocator.free(segment_info.name);
     }
     if (self.base.options.module) |mod| {
@@ -2029,6 +2086,12 @@ fn resetState(self: *Wasm) void {
     self.code_section_index = null;
     self.debug_info_index = null;
     self.debug_line_index = null;
+    self.debug_loc_index = null;
+    self.debug_str_index = null;
+    self.debug_ranges_index = null;
+    self.debug_abbrev_index = null;
+    self.debug_pubnames_index = null;
+    self.debug_pubtypes_index = null;
 }
 
 pub fn flush(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -2508,26 +2571,31 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         var debug_bytes = std.ArrayList(u8).init(self.base.allocator);
         defer debug_bytes.deinit();
 
-        const debug_sections = .{
-            .{ ".debug_info", self.debug_info_index },
-            .{ ".debug_pubtypes", self.debug_pubtypes_index },
-            .{ ".debug_abbrev", self.debug_abbrev_index },
-            .{ ".debug_line", self.debug_line_index },
-            .{ ".debug_str", self.debug_str_index },
-            .{ ".debug_pubnames", self.debug_pubnames_index },
-            .{ ".debug_loc", self.debug_loc_index },
-            .{ ".debug_ranges", self.debug_ranges_index },
+        const DebugSection = struct {
+            name: []const u8,
+            index: ?u32,
+        };
+
+        const debug_sections: []const DebugSection = &.{
+            .{ .name = ".debug_info", .index = self.debug_info_index },
+            .{ .name = ".debug_pubtypes", .index = self.debug_pubtypes_index },
+            .{ .name = ".debug_abbrev", .index = self.debug_abbrev_index },
+            .{ .name = ".debug_line", .index = self.debug_line_index },
+            .{ .name = ".debug_str", .index = self.debug_str_index },
+            .{ .name = ".debug_pubnames", .index = self.debug_pubnames_index },
+            .{ .name = ".debug_loc", .index = self.debug_loc_index },
+            .{ .name = ".debug_ranges", .index = self.debug_ranges_index },
         };
 
-        inline for (debug_sections) |item| {
-            if (item[1]) |index| {
+        for (debug_sections) |item| {
+            if (item.index) |index| {
                 var atom = self.atoms.get(index).?.getFirst();
                 while (true) {
                     atom.resolveRelocs(self);
                     try debug_bytes.appendSlice(atom.code.items);
                     atom = atom.next orelse break;
                 }
-                try emitDebugSection(file, debug_bytes.items, item[0]);
+                try emitDebugSection(file, debug_bytes.items, item.name);
                 debug_bytes.clearRetainingCapacity();
             }
         }
@@ -3242,8 +3310,8 @@ fn emitSegmentInfo(self: *Wasm, file: fs.File, arena: Allocator) !void {
     var payload = std.ArrayList(u8).init(arena);
     const writer = payload.writer();
     try leb.writeULEB128(file.writer(), @enumToInt(types.SubsectionType.WASM_SEGMENT_INFO));
-    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.items.len));
-    for (self.segment_info.items) |segment_info| {
+    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.count()));
+    for (self.segment_info.values()) |segment_info| {
         log.debug("Emit segment: {s} align({d}) flags({b})", .{
             segment_info.name,
             @ctz(segment_info.alignment),
author	Luuk de Gram <luuk@degram.dev>	2022-09-06 20:21:33 +0200
committer	Luuk de Gram <luuk@degram.dev>	2022-09-07 18:59:36 +0200
commit	a8d137d05ae36870d4e896cf5e37b591d9fa219c (patch)
tree	720dde8e8f150bb6bb2e63e5506ca337e50eee3d /src/link/Wasm.zig
parent	971327d6e0a2cdcfd1a7695d1dea86dbbebd730e (diff)
download	zig-a8d137d05ae36870d4e896cf5e37b591d9fa219c.tar.gz zig-a8d137d05ae36870d4e896cf5e37b591d9fa219c.zip