From a35f156cf60ed3d8095c15c4ab26aee267761a56 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 19:55:39 +0200
Subject: coff: re-enable default entrypoint for Windows

---
 src/arch/x86_64/CodeGen.zig | 24 +++++++++++++++++++++---
 src/link.zig                |  2 +-
 src/link/Coff.zig           | 30 +++++++++++++++++++++++++++---
 3 files changed, 49 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index e5d47e589a..d7294e9732 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -3999,7 +3999,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                 .data = undefined,
             });
         }
-    } else if (self.bin_file.cast(link.File.Coff)) |_| {
+    } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
         if (self.air.value(callee)) |func_value| {
             if (func_value.castTag(.function)) |func_payload| {
                 const func = func_payload.data;
@@ -4015,8 +4015,26 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     }),
                     .data = undefined,
                 });
-            } else if (func_value.castTag(.extern_fn)) |_| {
-                return self.fail("TODO implement calling extern functions", .{});
+            } else if (func_value.castTag(.extern_fn)) |func_payload| {
+                const extern_fn = func_payload.data;
+                const decl_name = mod.declPtr(extern_fn.owner_decl).name;
+                if (extern_fn.lib_name) |lib_name| {
+                    log.debug("TODO enforce that '{s}' is expected in '{s}' library", .{
+                        decl_name,
+                        lib_name,
+                    });
+                }
+                const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                _ = try self.addInst(.{
+                    .tag = .call_extern,
+                    .ops = undefined,
+                    .data = .{
+                        .relocation = .{
+                            .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.coff.sym_index,
+                            .sym_index = sym_index,
+                        },
+                    },
+                });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
             }
diff --git a/src/link.zig b/src/link.zig
index a8845a0d57..986f4e81b6 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -473,7 +473,7 @@ pub const File = struct {
         log.debug("getGlobalSymbol '{s}'", .{name});
         switch (base.tag) {
             // zig fmt: off
-            .coff  => unreachable,
+            .coff  => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name),
             .elf   => unreachable,
             .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name),
             .plan9 => unreachable,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e302571671..bf6a32431c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -596,7 +596,7 @@ fn allocateSymbol(self: *Coff) !u32 {
     self.locals.items[index] = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
@@ -1027,7 +1027,7 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
             log.debug("  adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index });
         }
 
-        self.locals.items[sym_index].section_number = @intToEnum(coff.SectionNumber, 0);
+        self.locals.items[sym_index].section_number = .UNDEFINED;
         _ = self.atom_by_index_table.remove(sym_index);
         decl.link.coff.sym_index = 0;
     }
@@ -1268,6 +1268,30 @@ pub fn getDeclVAddr(
     @panic("TODO getDeclVAddr");
 }
 
+pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
+    const gpa = self.base.allocator;
+    const sym_name = try gpa.dupe(u8, name);
+    const global_index = @intCast(u32, self.globals.values().len);
+    _ = global_index;
+    const gop = try self.globals.getOrPut(gpa, sym_name);
+    defer if (gop.found_existing) gpa.free(sym_name);
+
+    if (gop.found_existing) {
+        // TODO audit this: can we ever reference anything from outside the Zig module?
+        assert(gop.value_ptr.file == null);
+        return gop.value_ptr.sym_index;
+    }
+
+    const sym_index = try self.allocateSymbol();
+    const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null };
+    const sym = self.getSymbolPtr(sym_loc);
+    try self.setSymbolName(sym, sym_name);
+    sym.storage_class = .EXTERNAL;
+    gop.value_ptr.* = sym_loc;
+
+    return sym_index;
+}
+
 pub fn updateDeclLineNumber(self: *Coff, module: *Module, decl: *Module.Decl) !void {
     _ = self;
     _ = module;
@@ -1614,7 +1638,7 @@ inline fn getSizeOfImage(self: Coff) u32 {
 
 /// Returns symbol location corresponding to the set entrypoint (if any).
 pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
-    const entry_name = self.base.options.entry orelse "_start"; // TODO this is incomplete
+    const entry_name = self.base.options.entry orelse "wWinMainCRTStartup"; // TODO this is incomplete
     return self.globals.get(entry_name);
 }
 
-- 
cgit v1.2.3


From 51fba37af70283427a7ef5d2f2fd39f97aaa1e35 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 20:03:41 +0200
Subject: coff: add relocation for call_extern

---
 src/arch/x86_64/Emit.zig | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src')

diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 12f3e9118f..055216e2bb 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1157,6 +1157,18 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .length = 2,
             .@"type" = @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_BRANCH),
         });
+    } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| {
+        // Add relocation to the decl.
+        const atom = coff_file.atom_by_index_table.get(relocation.atom_index).?;
+        try atom.addRelocation(coff_file, .{
+            .@"type" = .direct,
+            .target = .{ .sym_index = relocation.sym_index, .file = null },
+            .offset = offset,
+            .addend = 0,
+            .pcrel = true,
+            .length = 2,
+            .prev_vaddr = atom.getSymbol(coff_file).value,
+        });
     } else {
         return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
     }
-- 
cgit v1.2.3


From 1ab149c5fc474e73cb52872e11cbd2b916961ede Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 21:15:23 +0200
Subject: coff: create import atoms and matching bindings

---
 src/link/Coff.zig      | 127 ++++++++++++++++++++++++++++++++++++++++++++++---
 src/link/Coff/Atom.zig |  10 ++++
 2 files changed, 131 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index bf6a32431c..a690c7cf63 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -51,9 +51,11 @@ got_section_index: ?u16 = null,
 rdata_section_index: ?u16 = null,
 data_section_index: ?u16 = null,
 reloc_section_index: ?u16 = null,
+idata_section_index: ?u16 = null,
 
 locals: std.ArrayListUnmanaged(coff.Symbol) = .{},
 globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{},
+unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{},
 
 locals_free_list: std.ArrayListUnmanaged(u32) = .{},
 
@@ -63,6 +65,9 @@ strtab_offset: ?u32 = null,
 got_entries: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 got_entries_free_list: std.ArrayListUnmanaged(u32) = .{},
 
+imports_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+imports_table_free_list: std.ArrayListUnmanaged(u32) = .{},
+
 /// Virtual address of the entry point procedure relative to image base.
 entry_addr: ?u32 = null,
 
@@ -109,6 +114,11 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
+/// A table of bindings indexed by the owning them `Atom`.
+/// Note that once we refactor `Atom`'s lifetime and ownership rules,
+/// this will be a table indexed by index into the list of Atoms.
+bindings: BindingTable = .{},
+
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -124,6 +134,7 @@ pub const Reloc = struct {
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
 const BaseRelocationTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(u32));
+const BindingTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(SymbolWithLoc));
 const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom));
 
 const default_file_alignment: u16 = 0x200;
@@ -269,10 +280,13 @@ pub fn deinit(self: *Coff) void {
 
     self.locals.deinit(gpa);
     self.globals.deinit(gpa);
+    self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);
     self.got_entries.deinit(gpa);
     self.got_entries_free_list.deinit(gpa);
+    self.imports_table.deinit(gpa);
+    self.imports_table_free_list.deinit(gpa);
     self.decls.deinit(gpa);
     self.atom_by_index_table.deinit(gpa);
 
@@ -299,6 +313,14 @@ pub fn deinit(self: *Coff) void {
         }
         self.base_relocs.deinit(gpa);
     }
+
+    {
+        var it = self.bindings.valueIterator();
+        while (it.next()) |bindings| {
+            bindings.deinit(gpa);
+        }
+        self.bindings.deinit(gpa);
+    }
 }
 
 fn populateMissingMetadata(self: *Coff) !void {
@@ -420,7 +442,7 @@ fn populateMissingMetadata(self: *Coff) !void {
             .number_of_linenumbers = 0,
             .flags = .{
                 .CNT_INITIALIZED_DATA = 1,
-                .MEM_PURGEABLE = 1,
+                .MEM_DISCARDABLE = 1,
                 .MEM_READ = 1,
             },
         };
@@ -428,6 +450,30 @@ fn populateMissingMetadata(self: *Coff) !void {
         try self.sections.append(gpa, .{ .header = header });
     }
 
+    if (self.idata_section_index == null) {
+        self.idata_section_index = @intCast(u16, self.sections.slice().len);
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        const off = self.findFreeSpace(file_size, self.page_size);
+        log.debug("found .idata free space 0x{x} to 0x{x}", .{ off, off + file_size });
+        var header = coff.SectionHeader{
+            .name = undefined,
+            .virtual_size = file_size,
+            .virtual_address = off,
+            .size_of_raw_data = file_size,
+            .pointer_to_raw_data = off,
+            .pointer_to_relocations = 0,
+            .pointer_to_linenumbers = 0,
+            .number_of_relocations = 0,
+            .number_of_linenumbers = 0,
+            .flags = .{
+                .CNT_INITIALIZED_DATA = 1,
+                .MEM_READ = 1,
+            },
+        };
+        try self.setSectionName(&header, ".idata");
+        try self.sections.append(gpa, .{ .header = header });
+    }
+
     if (self.strtab_offset == null) {
         try self.strtab.buffer.append(gpa, 0);
         self.strtab_offset = self.findFreeSpace(@intCast(u32, self.strtab.len()), 1);
@@ -626,6 +672,27 @@ pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     return index;
 }
 
+pub fn allocateImportEntry(self: *Coff, target: SymbolWithLoc) !u32 {
+    const gpa = self.base.allocator;
+    try self.imports_table.ensureUnusedCapacity(gpa, 1);
+    const index: u32 = blk: {
+        if (self.imports_table_free_list.popOrNull()) |index| {
+            log.debug("  (reusing import entry index {d})", .{index});
+            if (self.imports_table.getIndex(target)) |existing| {
+                assert(existing == index);
+            }
+            break :blk index;
+        } else {
+            log.debug("  (allocating import entry at index {d})", .{self.imports_table.keys().len});
+            const index = @intCast(u32, self.imports_table.keys().len);
+            self.imports_table.putAssumeCapacityNoClobber(target, 0);
+            break :blk index;
+        }
+    };
+    self.imports_table.keys()[index] = target;
+    return index;
+}
+
 fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     const gpa = self.base.allocator;
     const atom = try gpa.create(Atom);
@@ -666,6 +733,32 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     return atom;
 }
 
+fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
+    const gpa = self.base.allocator;
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+    atom.sym_index = try self.allocateSymbol();
+    atom.size = @sizeOf(u64);
+    atom.alignment = @alignOf(u64);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+    self.imports_table.getPtr(target).?.* = atom.sym_index;
+
+    const sym = atom.getSymbolPtr(self);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.idata_section_index.? + 1);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+
+    log.debug("allocated import atom at 0x{x}", .{sym.value});
+
+    const target_sym = self.getSymbol(target);
+    assert(target_sym.section_number == .UNDEFINED);
+    try atom.addBinding(self, target);
+
+    return atom;
+}
+
 fn growAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u32 {
     const sym = atom.getSymbol(self);
     const align_ok = mem.alignBackwardGeneric(u32, sym.value, alignment) == sym.value;
@@ -691,7 +784,7 @@ fn writeAtom(self: *Coff, atom: *Atom, code: []const u8) !void {
     try self.resolveRelocs(atom);
 }
 
-fn writeGotAtom(self: *Coff, atom: *Atom) !void {
+fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     switch (self.ptr_width) {
         .p32 => {
             var buffer: [@sizeOf(u32)]u8 = [_]u8{0} ** @sizeOf(u32);
@@ -718,7 +811,12 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
                 break :blk got_atom.getSymbol(self).value;
             },
-            .direct => self.getSymbol(reloc.target).value,
+            .direct => blk: {
+                if (self.getImportAtomForSymbol(reloc.target)) |import_atom| {
+                    break :blk import_atom.getSymbol(self).value;
+                }
+                break :blk self.getSymbol(reloc.target).value;
+            },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
@@ -971,7 +1069,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
                 sym.value = vaddr;
                 log.debug("  (updating GOT entry)", .{});
                 const got_atom = self.getGotAtomForSymbol(.{ .sym_index = atom.sym_index, .file = null }).?;
-                try self.writeGotAtom(got_atom);
+                try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
             self.shrinkAtom(atom, code_len);
@@ -992,7 +1090,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
         _ = try self.allocateGotEntry(got_target);
         const got_atom = try self.createGotAtom(got_target);
-        try self.writeGotAtom(got_atom);
+        try self.writePtrWidthAtom(got_atom);
     }
 
     try self.writeAtom(atom, code);
@@ -1227,6 +1325,16 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
     sub_prog_node.activate();
     defer sub_prog_node.end();
 
+    while (self.unresolved.popOrNull()) |entry| {
+        assert(entry.value); // We only expect imports generated by the incremental linker for now.
+        const global = self.globals.values()[entry.key];
+        if (self.imports_table.contains(global)) continue;
+
+        _ = try self.allocateImportEntry(global);
+        const import_atom = try self.createImportAtom(global);
+        try self.writePtrWidthAtom(import_atom);
+    }
+
     if (build_options.enable_logging) {
         self.logSymtab();
     }
@@ -1272,7 +1380,6 @@ pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
     const gpa = self.base.allocator;
     const sym_name = try gpa.dupe(u8, name);
     const global_index = @intCast(u32, self.globals.values().len);
-    _ = global_index;
     const gop = try self.globals.getOrPut(gpa, sym_name);
     defer if (gop.found_existing) gpa.free(sym_name);
 
@@ -1288,6 +1395,7 @@ pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
     try self.setSymbolName(sym, sym_name);
     sym.storage_class = .EXTERNAL;
     gop.value_ptr.* = sym_loc;
+    try self.unresolved.putNoClobber(gpa, global_index, true);
 
     return sym_index;
 }
@@ -1676,6 +1784,13 @@ pub fn getGotAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     return self.atom_by_index_table.get(got_index);
 }
 
+/// Returns import atom that references `sym_with_loc` if one exists.
+/// Returns null otherwise.
+pub fn getImportAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
+    const imports_index = self.imports_table.get(sym_loc) orelse return null;
+    return self.atom_by_index_table.get(imports_index);
+}
+
 fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !void {
     if (name.len <= 8) {
         mem.copy(u8, &header.name, name);
diff --git a/src/link/Coff/Atom.zig b/src/link/Coff/Atom.zig
index a7608d9a34..1d6e511f3b 100644
--- a/src/link/Coff/Atom.zig
+++ b/src/link/Coff/Atom.zig
@@ -118,3 +118,13 @@ pub fn addBaseRelocation(self: *Atom, coff_file: *Coff, offset: u32) !void {
     }
     try gop.value_ptr.append(gpa, offset);
 }
+
+pub fn addBinding(self: *Atom, coff_file: *Coff, target: SymbolWithLoc) !void {
+    const gpa = coff_file.base.allocator;
+    log.debug("  (adding binding to target %{d} in %{d})", .{ target.sym_index, self.sym_index });
+    const gop = try coff_file.bindings.getOrPut(gpa, self);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    try gop.value_ptr.append(gpa, target);
+}
-- 
cgit v1.2.3


From 0ebeb58d91b23acbd2ad3a168af19459af63a8f6 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 31 Aug 2022 21:49:51 +0200
Subject: coff: populate import address table dir

---
 src/link/Coff.zig | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index a690c7cf63..b57307c862 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1346,6 +1346,7 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
         }
     }
     try self.writeBaseRelocations();
+    try self.writeImportTable();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
         self.entry_addr = self.getSymbol(entry_sym_loc).value;
@@ -1484,6 +1485,22 @@ fn writeBaseRelocations(self: *Coff) !void {
     };
 }
 
+fn writeImportTable(self: *Coff) !void {
+    const gpa = self.base.allocator;
+    _ = gpa;
+
+    const section = self.sections.get(self.idata_section_index.?);
+    const iat_rva = section.header.virtual_address;
+    const iat_size = blk: {
+        const last_atom = section.last_atom.?;
+        break :blk last_atom.getSymbol(self).value + last_atom.size - iat_rva;
+    };
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
+        .virtual_address = iat_rva,
+        .size = iat_size,
+    };
+}
+
 fn writeStrtab(self: *Coff) !void {
     const allocated_size = self.allocatedSize(self.strtab_offset.?);
     const needed_size = @intCast(u32, self.strtab.len());
-- 
cgit v1.2.3


From aac4c1d3b225ff4cd7138d9aae599c9540c7f04e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 00:24:06 +0200
Subject: coff: fix contents of IAT, and ensure codegen loads addr into reg

As far as I can see, unlike with MachO, we don't have any stubs
helper routines available and need to load a bound pointer into
a register to then call it.
---
 src/arch/x86_64/CodeGen.zig | 76 +++++++++++++++++++++++++++++++---------
 src/arch/x86_64/Emit.zig    |  5 +--
 src/arch/x86_64/Mir.zig     |  1 +
 src/link/Coff.zig           | 85 ++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 140 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index d7294e9732..e6386d3ac7 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -137,6 +137,7 @@ pub const MCValue = union(enum) {
     /// If the type is a pointer, it means the pointer is referenced indirectly via GOT.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_GOT.
     got_load: u32,
+    imports_load: u32,
     /// The value is in memory referenced directly via symbol index.
     /// If the type is a pointer, it means the pointer is referenced directly via symbol index.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_SIGNED.
@@ -156,6 +157,7 @@ pub const MCValue = union(enum) {
             .ptr_stack_offset,
             .direct_load,
             .got_load,
+            .imports_load,
             => true,
             else => false,
         };
@@ -2274,6 +2276,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, array);
         },
@@ -2618,6 +2621,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             const reg = try self.copyToTmpRegister(ptr_ty, ptr);
             try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
@@ -2655,6 +2659,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
     switch (ptr) {
         .got_load,
         .direct_load,
+        .imports_load,
         => |sym_index| {
             const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*));
             const mod = self.bin_file.options.module.?;
@@ -2666,6 +2671,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
             const flags: u2 = switch (ptr) {
                 .got_load => 0b00,
                 .direct_load => 0b01,
+                .imports_load => 0b10,
                 else => unreachable,
             };
             _ = try self.addInst(.{
@@ -2763,6 +2769,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 .stack_offset,
                 => {
@@ -2783,6 +2790,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
         },
         .got_load,
         .direct_load,
+        .imports_load,
         .memory,
         => {
             const value_lock: ?RegisterLock = switch (value) {
@@ -2854,6 +2862,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 => {
                     if (abi_size <= 8) {
@@ -3565,6 +3574,7 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 .memory,
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .eflags,
                 => {
                     assert(abi_size <= 8);
@@ -3650,7 +3660,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3661,7 +3674,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
         .memory => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination symbol at index", .{});
         },
     }
@@ -3729,7 +3745,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3773,7 +3792,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory, .stack_offset => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3784,7 +3806,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
         .memory => {
             return self.fail("TODO implement x86 multiply destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 multiply destination symbol at index in linker", .{});
         },
     }
@@ -3948,6 +3973,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             .memory => unreachable,
             .got_load => unreachable,
             .direct_load => unreachable,
+            .imports_load => unreachable,
             .eflags => unreachable,
             .register_overflow => unreachable,
         }
@@ -4025,15 +4051,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     });
                 }
                 const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                try self.genSetReg(Type.initTag(.usize), .rax, .{
+                    .imports_load = sym_index,
+                });
                 _ = try self.addInst(.{
-                    .tag = .call_extern,
-                    .ops = undefined,
-                    .data = .{
-                        .relocation = .{
-                            .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.coff.sym_index,
-                            .sym_index = sym_index,
-                        },
-                    },
+                    .tag = .call,
+                    .ops = Mir.Inst.Ops.encode(.{
+                        .reg1 = .rax,
+                        .flags = 0b01,
+                    }),
+                    .data = undefined,
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -4443,7 +4470,11 @@ fn genVarDbgInfo(
                     leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable;
                     dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
                 },
-                .memory, .got_load, .direct_load => {
+                .memory,
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8));
                     const is_ptr = switch (tag) {
                         .dbg_var_ptr => true,
@@ -4474,7 +4505,10 @@ fn genVarDbgInfo(
                         try dbg_info.append(DW.OP.deref);
                     }
                     switch (mcv) {
-                        .got_load, .direct_load => |index| try dw.addExprlocReloc(index, offset, is_ptr),
+                        .got_load,
+                        .direct_load,
+                        .imports_load,
+                        => |index| try dw.addExprlocReloc(index, offset, is_ptr),
                         else => {},
                     }
                 },
@@ -5474,6 +5508,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .memory,
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5721,6 +5756,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5848,6 +5884,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
         },
@@ -5883,6 +5920,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
         },
@@ -6021,6 +6059,7 @@ fn genInlineMemset(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
         },
@@ -6261,6 +6300,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
         },
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             switch (ty.zigTypeTag()) {
                 .Float => {
@@ -6655,7 +6695,11 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     // TODO Is this the only condition for pointer dereference for memcpy?
     const src: MCValue = blk: {
         switch (src_ptr) {
-            .got_load, .direct_load, .memory => {
+            .got_load,
+            .direct_load,
+            .imports_load,
+            .memory,
+            => {
                 const reg = try self.register_manager.allocReg(null, gp);
                 try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr);
                 _ = try self.addInst(.{
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 055216e2bb..a0f34d5732 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -985,8 +985,8 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const relocation = emit.mir.instructions.items(.data)[inst].relocation;
 
     switch (ops.flags) {
-        0b00, 0b01 => {},
-        else => return emit.fail("TODO unused LEA PIC variants 0b10 and 0b11", .{}),
+        0b00, 0b01, 0b10 => {},
+        else => return emit.fail("TODO unused LEA PIC variant 0b11", .{}),
     }
 
     // lea reg1, [rip + reloc]
@@ -1024,6 +1024,7 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .@"type" = switch (ops.flags) {
                 0b00 => .got,
                 0b01 => .direct,
+                0b10 => .imports,
                 else => unreachable,
             },
             .target = .{ .sym_index = relocation.sym_index, .file = null },
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 71aecc5e85..b2e0f204eb 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -180,6 +180,7 @@ pub const Inst = struct {
         /// ops flags: form:
         ///      0b00  reg1, [rip + reloc] // via GOT PIC
         ///      0b01  reg1, [rip + reloc] // direct load PIC
+        ///      0b10  reg1, [rip + reloc] // via imports table PIC
         /// Notes:
         /// * `Data` contains `relocation`
         lea_pic,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index b57307c862..9f3ccc069c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -123,6 +123,7 @@ pub const Reloc = struct {
     @"type": enum {
         got,
         direct,
+        imports,
     },
     target: SymbolWithLoc,
     offset: u32,
@@ -812,18 +813,18 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 break :blk got_atom.getSymbol(self).value;
             },
             .direct => blk: {
-                if (self.getImportAtomForSymbol(reloc.target)) |import_atom| {
-                    break :blk import_atom.getSymbol(self).value;
-                }
                 break :blk self.getSymbol(reloc.target).value;
             },
+            .imports => blk: {
+                const import_atom = self.getImportAtomForSymbol(reloc.target) orelse continue;
+                break :blk import_atom.getSymbol(self).value;
+            },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
-
         if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
-            reloc.offset,
+            source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
@@ -833,7 +834,7 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             const source_vaddr = source_sym.value + reloc.offset;
             const disp = target_vaddr_with_addend - source_vaddr - 4;
             try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
-            return;
+            continue;
         }
 
         switch (self.ptr_width) {
@@ -1345,8 +1346,8 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
             try self.resolveRelocs(atom.*);
         }
     }
-    try self.writeBaseRelocations();
     try self.writeImportTable();
+    try self.writeBaseRelocations();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
         self.entry_addr = self.getSymbol(entry_sym_loc).value;
@@ -1487,14 +1488,80 @@ fn writeBaseRelocations(self: *Coff) !void {
 
 fn writeImportTable(self: *Coff) !void {
     const gpa = self.base.allocator;
-    _ = gpa;
 
     const section = self.sections.get(self.idata_section_index.?);
     const iat_rva = section.header.virtual_address;
     const iat_size = blk: {
         const last_atom = section.last_atom.?;
-        break :blk last_atom.getSymbol(self).value + last_atom.size - iat_rva;
+        break :blk last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
+    };
+
+    const dll_name = "KERNEL32.dll";
+
+    var import_dir_entry = coff.ImportDirectoryEntry{
+        .import_lookup_table_rva = @sizeOf(coff.ImportDirectoryEntry) * 2,
+        .time_date_stamp = 0,
+        .forwarder_chain = 0,
+        .name_rva = 0,
+        .import_address_table_rva = iat_rva,
+    };
+
+    // TODO: we currently assume there's only one (implicit) DLL - ntdll
+    var lookup_table = std.ArrayList(coff.ImportLookupEntry64.ByName).init(gpa);
+    defer lookup_table.deinit();
+
+    var names_table = std.ArrayList(u8).init(gpa);
+    defer names_table.deinit();
+
+    // TODO: check if import is still valid
+    for (self.imports_table.keys()) |target| {
+        const target_name = self.getSymbolName(target);
+        const start = names_table.items.len;
+        mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
+        try names_table.appendSlice(target_name);
+        try names_table.append(0);
+        const end = names_table.items.len;
+        if (!mem.isAlignedGeneric(usize, end - start, @sizeOf(u16))) {
+            try names_table.append(0);
+        }
+        try lookup_table.append(.{ .name_table_rva = @intCast(u31, start) });
+    }
+    try lookup_table.append(.{ .name_table_rva = 0 }); // the sentinel
+
+    const dir_entry_size = @sizeOf(coff.ImportDirectoryEntry) + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName) + names_table.items.len + dll_name.len + 1;
+    const needed_size = iat_size + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry);
+    const sect_capacity = self.allocatedSize(section.header.pointer_to_raw_data);
+    assert(needed_size < sect_capacity); // TODO: implement expanding .idata section
+
+    // Fixup offsets
+    const base_rva = iat_rva + iat_size;
+    import_dir_entry.import_lookup_table_rva += base_rva;
+    import_dir_entry.name_rva = @intCast(u32, base_rva + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry) - dll_name.len - 1);
+
+    for (lookup_table.items[0 .. lookup_table.items.len - 1]) |*lk| {
+        lk.name_table_rva += @intCast(u31, base_rva + @sizeOf(coff.ImportDirectoryEntry) * 2 + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName));
+    }
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacity(dir_entry_size + @sizeOf(coff.ImportDirectoryEntry));
+    buffer.appendSliceAssumeCapacity(mem.asBytes(&import_dir_entry));
+    buffer.appendNTimesAssumeCapacity(0, @sizeOf(coff.ImportDirectoryEntry)); // the sentinel; TODO: I think doing all of the above on bytes directly might be cleaner
+    buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(lookup_table.items));
+    buffer.appendSliceAssumeCapacity(names_table.items);
+    buffer.appendSliceAssumeCapacity(dll_name);
+    buffer.appendAssumeCapacity(0);
+
+    try self.base.file.?.pwriteAll(buffer.items, section.header.pointer_to_raw_data + iat_size);
+    // Override the IAT atoms
+    // TODO: we should rewrite only dirtied atoms, but that's for way later
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(lookup_table.items), section.header.pointer_to_raw_data);
+
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IMPORT)] = .{
+        .virtual_address = iat_rva + iat_size,
+        .size = @intCast(u32, @sizeOf(coff.ImportDirectoryEntry) * 2),
     };
+
     self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
         .virtual_address = iat_rva,
         .size = iat_size,
-- 
cgit v1.2.3


From a19e6adbf90771890ecdbb52d6dafab1943e4cc4 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 12:24:55 +0200
Subject: x86_64: add support for Win64/C calling convention

---
 src/arch/x86_64/CodeGen.zig | 20 +++++-------
 src/arch/x86_64/Emit.zig    |  7 +++--
 src/arch/x86_64/Mir.zig     | 65 +++++++++++++++++++--------------------
 src/arch/x86_64/abi.zig     | 74 ++++++++++++++++++++++++++++++++++++---------
 4 files changed, 103 insertions(+), 63 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index e6386d3ac7..5e404d00bd 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -32,11 +32,6 @@ const abi = @import("abi.zig");
 const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
 const errUnionErrorOffset = codegen.errUnionErrorOffset;
 
-const callee_preserved_regs = abi.callee_preserved_regs;
-const caller_preserved_regs = abi.caller_preserved_regs;
-const c_abi_int_param_regs = abi.c_abi_int_param_regs;
-const c_abi_int_return_regs = abi.c_abi_int_return_regs;
-
 const Condition = bits.Condition;
 const RegisterManager = abi.RegisterManager;
 const RegisterLock = RegisterManager.RegisterLock;
@@ -448,10 +443,11 @@ fn gen(self: *Self) InnerError!void {
 
         // Create list of registers to save in the prologue.
         // TODO handle register classes
-        var reg_list: Mir.RegisterList(Register, &callee_preserved_regs) = .{};
-        inline for (callee_preserved_regs) |reg| {
+        var reg_list = Mir.RegisterList{};
+        const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
+        for (callee_preserved_regs) |reg| {
             if (self.register_manager.isRegAllocated(reg)) {
-                reg_list.push(reg);
+                reg_list.push(callee_preserved_regs, reg);
             }
         }
         const saved_regs_stack_space: u32 = reg_list.count() * 8;
@@ -3923,7 +3919,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
 
     try self.spillEflagsIfOccupied();
 
-    for (caller_preserved_regs) |reg| {
+    for (abi.getCallerPreservedRegs(self.target.*)) |reg| {
         try self.register_manager.getReg(reg, null);
     }
 
@@ -7140,7 +7136,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
                     result.return_value = .{ .register = aliased_reg };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
@@ -7187,7 +7183,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                         else => false,
                     };
                     if (pass_in_reg) {
-                        if (next_int_reg >= c_abi_int_param_regs.len) break;
+                        if (next_int_reg >= abi.getCAbiIntParamRegs(self.target.*).len) break;
                         try by_reg.putNoClobber(i, next_int_reg);
                         next_int_reg += 1;
                     }
@@ -7210,7 +7206,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
                 const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
                 if (by_reg.get(i)) |int_reg| {
-                    const aliased_reg = registerAlias(c_abi_int_param_regs[int_reg], param_size);
+                    const aliased_reg = registerAlias(abi.getCAbiIntParamRegs(self.target.*)[int_reg], param_size);
                     result.args[i] = .{ .register = aliased_reg };
                     next_int_reg += 1;
                 } else {
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index a0f34d5732..66e603aab0 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -283,10 +283,11 @@ fn mirPushPopRegisterList(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerErro
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const payload = emit.mir.instructions.items(.data)[inst].payload;
     const save_reg_list = emit.mir.extraData(Mir.SaveRegisterList, payload).data;
-    const reg_list = Mir.RegisterList(Register, &abi.callee_preserved_regs).fromInt(save_reg_list.register_list);
     var disp: i32 = -@intCast(i32, save_reg_list.stack_end);
-    inline for (abi.callee_preserved_regs) |reg| {
-        if (reg_list.isSet(reg)) {
+    const reg_list = Mir.RegisterList.fromInt(save_reg_list.register_list);
+    const callee_preserved_regs = abi.getCalleePreservedRegs(emit.target.*);
+    for (callee_preserved_regs) |reg| {
+        if (reg_list.isSet(callee_preserved_regs, reg)) {
             switch (tag) {
                 .push => try lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
                     .disp = @bitCast(u32, disp),
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index b2e0f204eb..ca19847042 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -461,46 +461,43 @@ pub const Inst = struct {
     }
 };
 
-pub fn RegisterList(comptime Reg: type, comptime registers: []const Reg) type {
-    assert(registers.len <= @bitSizeOf(u32));
-    return struct {
-        bitset: RegBitSet = RegBitSet.initEmpty(),
-
-        const RegBitSet = IntegerBitSet(registers.len);
-        const Self = @This();
-
-        fn getIndexForReg(reg: Reg) RegBitSet.MaskInt {
-            inline for (registers) |cpreg, i| {
-                if (reg.id() == cpreg.id()) return i;
-            }
-            unreachable; // register not in input register list!
-        }
+pub const RegisterList = struct {
+    bitset: BitSet = BitSet.initEmpty(),
 
-        pub fn push(self: *Self, reg: Reg) void {
-            const index = getIndexForReg(reg);
-            self.bitset.set(index);
-        }
+    const BitSet = IntegerBitSet(@ctz(@as(u32, 0)));
+    const Self = @This();
 
-        pub fn isSet(self: Self, reg: Reg) bool {
-            const index = getIndexForReg(reg);
-            return self.bitset.isSet(index);
+    fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
+        for (registers) |cpreg, i| {
+            if (reg.id() == cpreg.id()) return @intCast(u32, i);
         }
+        unreachable; // register not in input register list!
+    }
 
-        pub fn asInt(self: Self) u32 {
-            return self.bitset.mask;
-        }
+    pub fn push(self: *Self, registers: []const Register, reg: Register) void {
+        const index = getIndexForReg(registers, reg);
+        self.bitset.set(index);
+    }
 
-        pub fn fromInt(mask: u32) Self {
-            return .{
-                .bitset = RegBitSet{ .mask = @intCast(RegBitSet.MaskInt, mask) },
-            };
-        }
+    pub fn isSet(self: Self, registers: []const Register, reg: Register) bool {
+        const index = getIndexForReg(registers, reg);
+        return self.bitset.isSet(index);
+    }
 
-        pub fn count(self: Self) u32 {
-            return @intCast(u32, self.bitset.count());
-        }
-    };
-}
+    pub fn asInt(self: Self) u32 {
+        return self.bitset.mask;
+    }
+
+    pub fn fromInt(mask: u32) Self {
+        return .{
+            .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) },
+        };
+    }
+
+    pub fn count(self: Self) u32 {
+        return @intCast(u32, self.bitset.count());
+    }
+};
 
 pub const SaveRegisterList = struct {
     /// Use `RegisterList` to populate.
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index 344fe235f3..2f4a7d1681 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -392,23 +392,69 @@ pub fn classifySystemV(ty: Type, target: Target) [8]Class {
     }
 }
 
-/// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
-/// for anything else but stack offset tracking therefore we exclude them from this set.
-pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
-/// These registers need to be preserved (saved on the stack) and restored by the caller before
-/// the caller relinquishes control to a subroutine via call instruction (or similar).
-/// In other words, these registers are free to use by the callee.
-pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
+pub const SysV = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 };
 
-pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
-pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+    pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
+};
+
+pub const Win64 = struct {
+    /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them
+    /// for anything else but stack offset tracking therefore we exclude them from this set.
+    pub const callee_preserved_regs = [_]Register{ .rbx, .rsi, .rdi, .r12, .r13, .r14, .r15 };
+    /// These registers need to be preserved (saved on the stack) and restored by the caller before
+    /// the caller relinquishes control to a subroutine via call instruction (or similar).
+    /// In other words, these registers are free to use by the callee.
+    pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 };
 
+    pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 };
+    pub const c_abi_int_return_regs = [_]Register{.rax};
+};
+
+pub fn getCalleePreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.callee_preserved_regs,
+        else => &SysV.callee_preserved_regs,
+    };
+}
+
+pub fn getCallerPreservedRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.caller_preserved_regs,
+        else => &SysV.caller_preserved_regs,
+    };
+}
+
+pub fn getCAbiIntParamRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_param_regs,
+        else => &SysV.c_abi_int_param_regs,
+    };
+}
+
+pub fn getCAbiIntReturnRegs(target: Target) []const Register {
+    return switch (target.os.tag) {
+        .windows => &Win64.c_abi_int_return_regs,
+        else => &SysV.c_abi_int_return_regs,
+    };
+}
+
+const gp_regs = [_]Register{
+    .rax, .rbx, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
+};
 const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
     .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
 };
-const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
-pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
+const allocatable_regs = gp_regs ++ sse_avx_regs;
+pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_regs);
 
 // Register classes
 const RegisterBitSet = RegisterManager.RegisterBitSet;
@@ -417,15 +463,15 @@ pub const RegisterClass = struct {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
             .start = 0,
-            .end = caller_preserved_regs.len + callee_preserved_regs.len,
+            .end = gp_regs.len,
         }, true);
         break :blk set;
     };
     pub const sse: RegisterBitSet = blk: {
         var set = RegisterBitSet.initEmpty();
         set.setRangeValue(.{
-            .start = caller_preserved_regs.len + callee_preserved_regs.len,
-            .end = allocatable_registers.len,
+            .start = gp_regs.len,
+            .end = allocatable_regs.len,
         }, true);
         break :blk set;
     };
-- 
cgit v1.2.3


From 49b1716064cb87b5e8ca13dcb1c9e4fc701737bc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 16:01:40 +0200
Subject: coff: implement lowering unnamed consts

---
 src/arch/x86_64/CodeGen.zig |   2 +-
 src/link/Coff.zig           | 110 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 103 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 5e404d00bd..1805fe7697 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -6959,7 +6959,7 @@ fn lowerUnnamedConst(self: *Self, tv: TypedValue) InnerError!MCValue {
     } else if (self.bin_file.cast(link.File.MachO)) |_| {
         return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Coff)) |_| {
-        return self.fail("TODO lower unnamed const in COFF", .{});
+        return MCValue{ .direct_load = local_sym_index };
     } else if (self.bin_file.cast(link.File.Plan9)) |_| {
         return self.fail("TODO lower unnamed const in Plan9", .{});
     } else {
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 9f3ccc069c..05ccfb7710 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -862,6 +862,11 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
+    // TODO hashmap
+    for (self.managed_atoms.items) |owned| {
+        if (owned == atom) break;
+    } else atom.deinit(self.base.allocator);
+
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
     const free_list = &self.sections.items(.free_list)[sect_id];
@@ -955,10 +960,67 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
 }
 
 pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.Index) !u32 {
-    _ = self;
-    _ = tv;
-    _ = decl_index;
-    @panic("TODO lowerUnnamedConst");
+    const gpa = self.base.allocator;
+    var code_buffer = std.ArrayList(u8).init(gpa);
+    defer code_buffer.deinit();
+
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index);
+    if (!gop.found_existing) {
+        gop.value_ptr.* = .{};
+    }
+    const unnamed_consts = gop.value_ptr;
+
+    const atom = try gpa.create(Atom);
+    errdefer gpa.destroy(atom);
+    atom.* = Atom.empty;
+
+    atom.sym_index = try self.allocateSymbol();
+    const sym = atom.getSymbolPtr(self);
+    const sym_name = blk: {
+        const decl_name = try decl.getFullyQualifiedName(mod);
+        defer gpa.free(decl_name);
+
+        const index = unnamed_consts.items.len;
+        break :blk try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
+    };
+    defer gpa.free(sym_name);
+    try self.setSymbolName(sym, sym_name);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.?);
+
+    try self.managed_atoms.append(gpa, atom);
+    try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
+
+    const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), tv, &code_buffer, .none, .{
+        .parent_atom_index = atom.sym_index,
+    });
+    const code = switch (res) {
+        .externally_managed => |x| x,
+        .appended => code_buffer.items,
+        .fail => |em| {
+            decl.analysis = .codegen_failure;
+            try mod.failed_decls.put(mod.gpa, decl_index, em);
+            log.err("{s}", .{em.msg});
+            return error.AnalysisFail;
+        },
+    };
+
+    const required_alignment = tv.ty.abiAlignment(self.base.options.target);
+    atom.alignment = required_alignment;
+    atom.size = @intCast(u32, code.len);
+    sym.value = try self.allocateAtom(atom, atom.size, atom.alignment);
+    errdefer self.freeAtom(atom);
+
+    try unnamed_consts.append(gpa, atom);
+
+    log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, sym.value });
+    log.debug("  (required alignment 0x{x})", .{required_alignment});
+
+    try self.writeAtom(atom, code);
+
+    return atom.sym_index;
 }
 
 pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !void {
@@ -1097,6 +1159,20 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
     try self.writeAtom(atom, code);
 }
 
+fn freeUnnamedConsts(self: *Coff, decl_index: Module.Decl.Index) void {
+    const gpa = self.base.allocator;
+    const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
+    for (unnamed_consts.items) |atom| {
+        self.freeAtom(atom);
+        self.locals_free_list.append(gpa, atom.sym_index) catch {};
+        self.locals.items[atom.sym_index].section_number = .UNDEFINED;
+        _ = self.atom_by_index_table.remove(atom.sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{atom.sym_index});
+        atom.sym_index = 0;
+    }
+    unnamed_consts.clearAndFree(gpa);
+}
+
 pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     if (build_options.have_llvm) {
         if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index);
@@ -1110,6 +1186,7 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
     const kv = self.decls.fetchRemove(decl_index);
     if (kv.?.value) |_| {
         self.freeAtom(&decl.link.coff);
+        self.freeUnnamedConsts(decl_index);
     }
 
     // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
@@ -1372,10 +1449,27 @@ pub fn getDeclVAddr(
     decl_index: Module.Decl.Index,
     reloc_info: link.File.RelocInfo,
 ) !u64 {
-    _ = self;
-    _ = decl_index;
-    _ = reloc_info;
-    @panic("TODO getDeclVAddr");
+    const mod = self.base.options.module.?;
+    const decl = mod.declPtr(decl_index);
+
+    assert(self.llvm_object == null);
+    assert(decl.link.coff.sym_index != 0);
+
+    const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?;
+    const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null };
+    const target_sym = self.getSymbol(target);
+    try atom.addRelocation(self, .{
+        .@"type" = .direct,
+        .target = target,
+        .offset = @intCast(u32, reloc_info.offset),
+        .addend = reloc_info.addend,
+        .pcrel = false,
+        .length = 3,
+        .prev_vaddr = target_sym.value,
+    });
+    try atom.addBaseRelocation(self, @intCast(u32, reloc_info.offset));
+
+    return 0;
 }
 
 pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
-- 
cgit v1.2.3


From 38573fed0ba796f642e7b4eebdf3f9eafc572f25 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 16:35:09 +0200
Subject: coff: fix runtime traps

---
 src/arch/x86_64/CodeGen.zig | 7 ++++++-
 src/arch/x86_64/abi.zig     | 2 +-
 src/link/Coff.zig           | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 1805fe7697..6c2db1f25e 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4204,6 +4204,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
         },
         .stack_offset => {
             const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
+            log.warn("REG = {}", .{reg});
             const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
             defer self.register_manager.unlockReg(reg_lock);
 
@@ -5854,7 +5855,11 @@ fn genInlineMemcpy(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and .rcx if not free, and then restore
+    // TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore
+    // How can we do this without context if the value inside .rax or .rcx we preserve contains
+    // value needed to perform the memcpy in the first place?
+    // I think we should have an accumulator-based context that we pass with each subsequent helper
+    // call until we resolve the entire instruction.
     try self.register_manager.getReg(.rax, null);
     try self.register_manager.getReg(.rcx, null);
 
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index 2f4a7d1681..298fc6656f 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -447,7 +447,7 @@ pub fn getCAbiIntReturnRegs(target: Target) []const Register {
 }
 
 const gp_regs = [_]Register{
-    .rax, .rbx, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
+    .rbx, .r12, .r13, .r14, .r15, .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11,
 };
 const sse_avx_regs = [_]Register{
     .ymm0, .ymm1, .ymm2,  .ymm3,  .ymm4,  .ymm5,  .ymm6,  .ymm7,
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 05ccfb7710..b5670ce5a1 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1053,7 +1053,7 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         .ty = decl.ty,
         .val = decl_val,
     }, &code_buffer, .none, .{
-        .parent_atom_index = 0,
+        .parent_atom_index = decl.link.coff.sym_index,
     });
     const code = switch (res) {
         .externally_managed => |x| x,
-- 
cgit v1.2.3


From 3a4c69c01824fb6f72e90433a5683a8df09ad4c1 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 1 Sep 2022 19:04:08 +0200
Subject: x86_64: implement Windows x64 calling convention

---
 lib/std/os/windows/kernel32.zig |   8 ++-
 src/arch/x86_64/CodeGen.zig     | 130 ++++++++++++++++++++++------------------
 2 files changed, 79 insertions(+), 59 deletions(-)

(limited to 'src')

diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig
index 9e6f5df97b..8d146def7f 100644
--- a/lib/std/os/windows/kernel32.zig
+++ b/lib/std/os/windows/kernel32.zig
@@ -348,7 +348,13 @@ pub extern "kernel32" fn WriteFile(
     in_out_lpOverlapped: ?*OVERLAPPED,
 ) callconv(WINAPI) BOOL;
 
-pub extern "kernel32" fn WriteFileEx(hFile: HANDLE, lpBuffer: [*]const u8, nNumberOfBytesToWrite: DWORD, lpOverlapped: *OVERLAPPED, lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE) callconv(WINAPI) BOOL;
+pub extern "kernel32" fn WriteFileEx(
+    hFile: HANDLE,
+    lpBuffer: [*]const u8,
+    nNumberOfBytesToWrite: DWORD,
+    lpOverlapped: *OVERLAPPED,
+    lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE,
+) callconv(WINAPI) BOOL;
 
 pub extern "kernel32" fn LoadLibraryW(lpLibFileName: [*:0]const u16) callconv(WINAPI) ?HMODULE;
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 6c2db1f25e..15e14bcbb8 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4204,7 +4204,6 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
         },
         .stack_offset => {
             const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
-            log.warn("REG = {}", .{reg});
             const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
             defer self.register_manager.unlockReg(reg_lock);
 
@@ -7129,11 +7128,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             result.stack_align = 1;
             return result;
         },
-        .Unspecified, .C => {
+        .C => {
             // Return values
             if (ret_ty.zigTypeTag() == .NoReturn) {
                 result.return_value = .{ .unreach = {} };
             } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
+                // TODO: is this even possible for C calling convention?
                 result.return_value = .{ .none = {} };
             } else {
                 const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
@@ -7144,81 +7144,95 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
                     result.return_value = .{ .register = aliased_reg };
                 } else {
-                    // We simply make the return MCValue a stack offset. However, the actual value
-                    // for the offset will be populated later. We will also push the stack offset
-                    // value into .rdi register when we resolve the offset.
+                    // TODO: return argument cell should go first
                     result.return_value = .{ .stack_offset = 0 };
                 }
             }
 
             // Input params
-            // First, split into args that can be passed via registers.
-            // This will make it easier to then push the rest of args in reverse
-            // order on the stack.
-            var next_int_reg: usize = 0;
-            var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator);
-            defer by_reg.deinit();
-
-            // If we want debug output, we store all args on stack for better liveness of args
-            // in debugging contexts such as previewing the args in the debugger anywhere in
-            // the procedure. Passing the args via registers can lead to reusing the register
-            // for local ops thus clobbering the input arg forever.
-            // This of course excludes C ABI calls.
-            const omit_args_in_registers = blk: {
-                if (cc == .C) break :blk false;
-                switch (self.bin_file.options.optimize_mode) {
-                    .Debug => break :blk true,
-                    else => break :blk false,
-                }
+            var next_stack_offset: u32 = switch (result.return_value) {
+                .stack_offset => |off| @intCast(u32, off),
+                else => 0,
             };
-            if (!omit_args_in_registers) {
-                for (param_types) |ty, i| {
-                    if (!ty.hasRuntimeBits()) continue;
-                    const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                    // For simplicity of codegen, slices and other types are always pushed onto the stack.
-                    // TODO: look into optimizing this by passing things as registers sometimes,
-                    // such as ptr and len of slices as separate registers.
-                    // TODO: also we need to honor the C ABI for relevant types rather than passing on
-                    // the stack here.
-                    const pass_in_reg = switch (ty.zigTypeTag()) {
-                        .Bool => true,
-                        .Int, .Enum => param_size <= 8,
-                        .Pointer => ty.ptrSize() != .Slice,
-                        .Optional => ty.isPtrLikeOptional(),
-                        else => false,
-                    };
-                    if (pass_in_reg) {
-                        if (next_int_reg >= abi.getCAbiIntParamRegs(self.target.*).len) break;
-                        try by_reg.putNoClobber(i, next_int_reg);
-                        next_int_reg += 1;
-                    }
+
+            for (param_types) |ty, i| {
+                assert(ty.hasRuntimeBits());
+
+                if (self.target.os.tag != .windows) {
+                    return self.fail("TODO SysV calling convention", .{});
+                }
+
+                switch (abi.classifyWindows(ty, self.target.*)) {
+                    .integer => blk: {
+                        if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
+                        result.args[i] = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
+                        continue;
+                    },
+                    .sse => return self.fail("TODO float/vector via SSE on Windows", .{}),
+                    .memory => {}, // fallthrough
+                    else => unreachable,
+                }
+
+                const param_size = @intCast(u32, ty.abiSize(self.target.*));
+                const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
+            }
+            // Align the stack to 16bytes before allocating shadow stack space.
+            const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
+            const padding = aligned_next_stack_offset - next_stack_offset;
+            if (padding > 0) {
+                for (result.args) |*arg| {
+                    if (arg.isRegister()) continue;
+                    arg.stack_offset += @intCast(i32, padding);
+                }
+            }
+
+            // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push
+            // the args onto the stack so that there is no padding between the first argument and
+            // the standard preamble.
+            // alignment padding | ret value (if > 8) | args ... | shadow stack space | $rbp |
+            result.stack_byte_count = aligned_next_stack_offset + 4 * @sizeOf(u64);
+            result.stack_align = 16;
+        },
+        .Unspecified => {
+            // Return values
+            if (ret_ty.zigTypeTag() == .NoReturn) {
+                result.return_value = .{ .unreach = {} };
+            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
+                result.return_value = .{ .none = {} };
+            } else {
+                const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+                if (ret_ty_size == 0) {
+                    assert(ret_ty.isError());
+                    result.return_value = .{ .immediate = 0 };
+                } else if (ret_ty_size <= 8) {
+                    result.return_value = .{ .register = .rdi };
+                } else {
+                    // We simply make the return MCValue a stack offset. However, the actual value
+                    // for the offset will be populated later. We will also push the stack offset
+                    // value into .rdi register when we resolve the offset.
+                    result.return_value = .{ .stack_offset = 0 };
                 }
             }
 
+            // Input params
             var next_stack_offset: u32 = switch (result.return_value) {
                 .stack_offset => |off| @intCast(u32, off),
                 else => 0,
             };
-            var count: usize = param_types.len;
-            while (count > 0) : (count -= 1) {
-                const i = count - 1;
-                const ty = param_types[i];
+
+            for (param_types) |ty, i| {
                 if (!ty.hasRuntimeBits()) {
-                    assert(cc != .C);
                     result.args[i] = .{ .none = {} };
                     continue;
                 }
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
                 const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
-                if (by_reg.get(i)) |int_reg| {
-                    const aliased_reg = registerAlias(abi.getCAbiIntParamRegs(self.target.*)[int_reg], param_size);
-                    result.args[i] = .{ .register = aliased_reg };
-                    next_int_reg += 1;
-                } else {
-                    const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
-                    result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
-                    next_stack_offset = offset;
-                }
+                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
+                result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
+                next_stack_offset = offset;
             }
 
             result.stack_align = 16;
-- 
cgit v1.2.3


From c0e288c78248870bf9881b25ff8354c67753bbe2 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 17:09:07 +0200
Subject: x86_64: implement canonicalising branches in switch expression

---
 src/arch/x86_64/CodeGen.zig | 192 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 167 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 15e14bcbb8..4a7efda02f 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -200,6 +200,34 @@ const Branch = struct {
         self.inst_table.deinit(gpa);
         self.* = undefined;
     }
+
+    const FormatContext = struct {
+        insts: []const Air.Inst.Index,
+        mcvs: []const MCValue,
+    };
+
+    fn fmt(
+        ctx: FormatContext,
+        comptime unused_format_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        _ = options;
+        comptime assert(unused_format_string.len == 0);
+        try writer.writeAll("Branch {\n");
+        for (ctx.insts) |inst, i| {
+            const mcv = ctx.mcvs[i];
+            try writer.print("  %{d} => {}\n", .{ inst, mcv });
+        }
+        try writer.writeAll("}");
+    }
+
+    fn fmtDebug(self: @This()) std.fmt.Formatter(fmt) {
+        return .{ .data = .{
+            .insts = self.inst_table.keys(),
+            .mcvs = self.inst_table.values(),
+        } };
+    }
 };
 
 const StackAllocation = struct {
@@ -232,7 +260,7 @@ const BigTomb = struct {
     fn finishAir(bt: *BigTomb, result: MCValue) void {
         const is_used = !bt.function.liveness.isUnused(bt.inst);
         if (is_used) {
-            log.debug("%{d} => {}", .{ bt.inst, result });
+            log.debug("  (saving %{d} => {})", .{ bt.inst, result });
             const branch = &bt.function.branch_stack.items[bt.function.branch_stack.items.len - 1];
             branch.inst_table.putAssumeCapacityNoClobber(bt.inst, result);
         }
@@ -795,6 +823,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
+    log.debug("  (processing death of %{d})", .{inst});
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -822,8 +851,10 @@ fn finishAirBookkeeping(self: *Self) void {
 }
 
 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
+    log.debug("finishAir: %{d}, {}, {any}", .{ inst, result, operands });
     var tomb_bits = self.liveness.getTombBits(inst);
     for (operands) |op| {
+        log.debug("  (processing {})", .{op});
         const dies = @truncate(u1, tomb_bits) != 0;
         tomb_bits >>= 1;
         if (!dies) continue;
@@ -834,7 +865,7 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live
     }
     const is_used = @truncate(u1, tomb_bits) == 0;
     if (is_used) {
-        log.debug("%{d} => {}", .{ inst, result });
+        log.debug("  (saving %{d} => {})", .{ inst, result });
         const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
         branch.inst_table.putAssumeCapacityNoClobber(inst, result);
 
@@ -4647,6 +4678,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     const reloc = try self.genCondBrMir(cond_ty, cond);
 
+    log.debug("airCondBr: %{d}", .{inst});
+
     // If the condition dies here in this condbr instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -4674,15 +4707,17 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     // Revert to the previous register and stack allocation state.
 
-    var saved_then_branch = self.branch_stack.pop();
-    defer saved_then_branch.deinit(self.gpa);
+    var then_branch = self.branch_stack.pop();
+    defer then_branch.deinit(self.gpa);
 
     self.revertState(saved_state);
 
     try self.performReloc(reloc);
 
-    const else_branch = self.branch_stack.addOneAssumeCapacity();
-    else_branch.* = .{};
+    try self.branch_stack.append(.{});
+    errdefer {
+        _ = self.branch_stack.pop();
+    }
 
     try self.ensureProcessDeathCapacity(liveness_condbr.else_deaths.len);
     for (liveness_condbr.else_deaths) |operand| {
@@ -4690,6 +4725,9 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     }
     try self.genBody(else_body);
 
+    var else_branch = self.branch_stack.pop();
+    defer else_branch.deinit(self.gpa);
+
     // At this point, each branch will possibly have conflicting values for where
     // each instruction is stored. They agree, however, on which instructions are alive/dead.
     // We use the first ("then") branch as canonical, and here emit
@@ -4698,15 +4736,23 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
-    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
     try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
 
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
+    }
+
+    log.debug("Then branch: {}", .{then_branch.fmtDebug()});
+    log.debug("Else branch: {}", .{else_branch.fmtDebug()});
+
     const else_slice = else_branch.inst_table.entries.slice();
     const else_keys = else_slice.items(.key);
     const else_values = else_slice.items(.value);
     for (else_keys) |else_key, else_idx| {
         const else_value = else_values[else_idx];
-        const canon_mcv = if (saved_then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
+        const canon_mcv = if (then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
             // The instruction's MCValue is overridden in both branches.
             parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
             if (else_value == .dead) {
@@ -4718,7 +4764,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
             if (else_value == .dead)
                 continue;
             // The instruction is only overridden in the else branch.
-            var i: usize = self.branch_stack.items.len - 2;
+            var i: usize = self.branch_stack.items.len - 1;
             while (true) {
                 i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
                 if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
@@ -4733,8 +4779,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
         // TODO track the new register / stack allocation
     }
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count());
-    const then_slice = saved_then_branch.inst_table.entries.slice();
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, then_branch.inst_table.count());
+    const then_slice = then_branch.inst_table.entries.slice();
     const then_keys = then_slice.items(.key);
     const then_values = then_slice.items(.value);
     for (then_keys) |then_key, then_idx| {
@@ -4746,7 +4792,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         if (then_value == .dead)
             continue;
         const parent_mcv = blk: {
-            var i: usize = self.branch_stack.items.len - 2;
+            log.debug("{d}", .{self.branch_stack.items.len});
+            var i: usize = self.branch_stack.items.len - 1;
             while (true) {
                 i -= 1;
                 if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
@@ -4762,11 +4809,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
         // TODO track the new register / stack allocation
     }
 
-    {
-        var item = self.branch_stack.pop();
-        item.deinit(self.gpa);
-    }
-
     // We already took care of pl_op.operand earlier, so we're going
     // to pass .none here
     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
@@ -5139,6 +5181,8 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     );
     defer self.gpa.free(liveness.deaths);
 
+    log.debug("airSwitch: %{d}", .{inst});
+
     // If the condition dies here in this switch instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -5150,6 +5194,15 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
         }
     }
 
+    var branch_stack = std.ArrayList(Branch).init(self.gpa);
+    defer {
+        for (branch_stack.items) |*bs| {
+            bs.deinit(self.gpa);
+        }
+        branch_stack.deinit();
+    }
+    try branch_stack.ensureTotalCapacityPrecise(switch_br.data.cases_len + 1);
+
     while (case_i < switch_br.data.cases_len) : (case_i += 1) {
         const case = self.air.extraData(Air.SwitchBr.Case, extra_index);
         const items = @ptrCast([]const Air.Inst.Ref, self.air.extra[case.end..][0..case.data.items_len]);
@@ -5179,10 +5232,9 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(case_body);
 
-        // Revert to the previous register and stack allocation state.
-        var saved_case_branch = self.branch_stack.pop();
-        defer saved_case_branch.deinit(self.gpa);
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
 
+        // Revert to the previous register and stack allocation state.
         self.revertState(saved_state);
 
         for (relocs) |reloc| {
@@ -5192,10 +5244,13 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
     if (switch_br.data.else_body_len > 0) {
         const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len];
+
+        // Capture the state of register and stack allocation state so that we can revert to it.
+        const saved_state = try self.captureState();
+
         try self.branch_stack.append(.{});
-        defer {
-            var item = self.branch_stack.pop();
-            item.deinit(self.gpa);
+        errdefer {
+            _ = self.branch_stack.pop();
         }
 
         const else_deaths = liveness.deaths.len - 1;
@@ -5206,8 +5261,29 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
         try self.genBody(else_body);
 
-        // TODO consolidate returned MCValues between prongs and else branch like we do
-        // in airCondBr.
+        branch_stack.appendAssumeCapacity(self.branch_stack.pop());
+
+        // Revert to the previous register and stack allocation state.
+        self.revertState(saved_state);
+    }
+
+    // Consolidate returned MCValues between prongs and else branch like we do
+    // in airCondBr.
+    log.debug("Upper branches:", .{});
+    for (self.branch_stack.items) |bs| {
+        log.debug("{}", .{bs.fmtDebug()});
+    }
+    for (branch_stack.items) |bs, i| {
+        log.debug("Case-{d} branch: {}", .{ i, bs.fmtDebug() });
+    }
+
+    // TODO: can we reduce the complexity of this algorithm?
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    var i: usize = branch_stack.items.len;
+    while (i > 1) : (i -= 1) {
+        const canon_branch = &branch_stack.items[i - 2];
+        const target_branch = &branch_stack.items[i - 1];
+        try self.canonicaliseBranches(parent_branch, canon_branch, target_branch);
     }
 
     // We already took care of pl_op.operand earlier, so we're going
@@ -5215,6 +5291,72 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, .unreach, .{ .none, .none, .none });
 }
 
+fn canonicaliseBranches(self: *Self, parent_branch: *Branch, canon_branch: *Branch, target_branch: *Branch) !void {
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, target_branch.inst_table.count());
+
+    const target_slice = target_branch.inst_table.entries.slice();
+    const target_keys = target_slice.items(.key);
+    const target_values = target_slice.items(.value);
+
+    for (target_keys) |target_key, target_idx| {
+        const target_value = target_values[target_idx];
+        const canon_mcv = if (canon_branch.inst_table.fetchSwapRemove(target_key)) |canon_entry| blk: {
+            // The instruction's MCValue is overridden in both branches.
+            parent_branch.inst_table.putAssumeCapacity(target_key, canon_entry.value);
+            if (target_value == .dead) {
+                assert(canon_entry.value == .dead);
+                continue;
+            }
+            break :blk canon_entry.value;
+        } else blk: {
+            if (target_value == .dead)
+                continue;
+            // The instruction is only overridden in the else branch.
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
+                if (self.branch_stack.items[i].inst_table.get(target_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating target_entry {d} {}=>{}", .{ target_key, target_value, canon_mcv });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(target_key), canon_mcv, target_value);
+        // TODO track the new register / stack allocation
+    }
+    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, canon_branch.inst_table.count());
+    const canon_slice = canon_branch.inst_table.entries.slice();
+    const canon_keys = canon_slice.items(.key);
+    const canon_values = canon_slice.items(.value);
+    for (canon_keys) |canon_key, canon_idx| {
+        const canon_value = canon_values[canon_idx];
+        // We already deleted the items from this table that matched the target_branch.
+        // So these are all instructions that are only overridden in the canon branch.
+        parent_branch.inst_table.putAssumeCapacity(canon_key, canon_value);
+        log.debug("canon_value = {}", .{canon_value});
+        if (canon_value == .dead)
+            continue;
+        const parent_mcv = blk: {
+            var i: usize = self.branch_stack.items.len - 1;
+            while (true) {
+                i -= 1;
+                if (self.branch_stack.items[i].inst_table.get(canon_key)) |mcv| {
+                    assert(mcv != .dead);
+                    break :blk mcv;
+                }
+            }
+        };
+        log.debug("consolidating canon_entry {d} {}=>{}", .{ canon_key, parent_mcv, canon_value });
+        // TODO make sure the destination stack offset / register does not already have something
+        // going on there.
+        try self.setRegOrMem(self.air.typeOfIndex(canon_key), parent_mcv, canon_value);
+        // TODO track the new register / stack allocation
+    }
+}
+
 fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void {
     const next_inst = @intCast(u32, self.mir_instructions.len);
     switch (self.mir_instructions.items(.tag)[reloc]) {
-- 
cgit v1.2.3


From b9c31a8703fdd8297673aa65a1bdefd56cd13b77 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 17:19:14 +0200
Subject: x86_64: refactor cond_br with canonicaliseBranches helper

---
 src/arch/x86_64/CodeGen.zig | 66 ++-------------------------------------------
 1 file changed, 2 insertions(+), 64 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 4a7efda02f..1164df4b19 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4736,9 +4736,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
-    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, else_branch.inst_table.count());
-
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});
@@ -4747,67 +4744,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     log.debug("Then branch: {}", .{then_branch.fmtDebug()});
     log.debug("Else branch: {}", .{else_branch.fmtDebug()});
 
-    const else_slice = else_branch.inst_table.entries.slice();
-    const else_keys = else_slice.items(.key);
-    const else_values = else_slice.items(.value);
-    for (else_keys) |else_key, else_idx| {
-        const else_value = else_values[else_idx];
-        const canon_mcv = if (then_branch.inst_table.fetchSwapRemove(else_key)) |then_entry| blk: {
-            // The instruction's MCValue is overridden in both branches.
-            parent_branch.inst_table.putAssumeCapacity(else_key, then_entry.value);
-            if (else_value == .dead) {
-                assert(then_entry.value == .dead);
-                continue;
-            }
-            break :blk then_entry.value;
-        } else blk: {
-            if (else_value == .dead)
-                continue;
-            // The instruction is only overridden in the else branch.
-            var i: usize = self.branch_stack.items.len - 1;
-            while (true) {
-                i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
-                if (self.branch_stack.items[i].inst_table.get(else_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value);
-        // TODO track the new register / stack allocation
-    }
-    try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, then_branch.inst_table.count());
-    const then_slice = then_branch.inst_table.entries.slice();
-    const then_keys = then_slice.items(.key);
-    const then_values = then_slice.items(.value);
-    for (then_keys) |then_key, then_idx| {
-        const then_value = then_values[then_idx];
-        // We already deleted the items from this table that matched the else_branch.
-        // So these are all instructions that are only overridden in the then branch.
-        parent_branch.inst_table.putAssumeCapacity(then_key, then_value);
-        log.debug("then_value = {}", .{then_value});
-        if (then_value == .dead)
-            continue;
-        const parent_mcv = blk: {
-            log.debug("{d}", .{self.branch_stack.items.len});
-            var i: usize = self.branch_stack.items.len - 1;
-            while (true) {
-                i -= 1;
-                if (self.branch_stack.items[i].inst_table.get(then_key)) |mcv| {
-                    assert(mcv != .dead);
-                    break :blk mcv;
-                }
-            }
-        };
-        log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value });
-        // TODO make sure the destination stack offset / register does not already have something
-        // going on there.
-        try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value);
-        // TODO track the new register / stack allocation
-    }
+    const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+    try self.canonicaliseBranches(parent_branch, &then_branch, &else_branch);
 
     // We already took care of pl_op.operand earlier, so we're going
     // to pass .none here
-- 
cgit v1.2.3


From 1d57b347e9aeddc2de33b1b77b331d36e4900425 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 20:49:37 +0200
Subject: x86_64: clean up logging

---
 src/arch/x86_64/CodeGen.zig | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 1164df4b19..bb6191af06 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -222,6 +222,14 @@ const Branch = struct {
         try writer.writeAll("}");
     }
 
+    fn format(branch: Branch, comptime unused_format_string: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = branch;
+        _ = unused_format_string;
+        _ = options;
+        _ = writer;
+        @compileError("do not format Branch directly; use ty.fmtDebug()");
+    }
+
     fn fmtDebug(self: @This()) std.fmt.Formatter(fmt) {
         return .{ .data = .{
             .insts = self.inst_table.keys(),
@@ -823,7 +831,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
-    log.debug("  (processing death of %{d})", .{inst});
+    log.debug("%{d} => {}", .{ inst, MCValue.dead });
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -851,10 +859,8 @@ fn finishAirBookkeeping(self: *Self) void {
 }
 
 fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void {
-    log.debug("finishAir: %{d}, {}, {any}", .{ inst, result, operands });
     var tomb_bits = self.liveness.getTombBits(inst);
     for (operands) |op| {
-        log.debug("  (processing {})", .{op});
         const dies = @truncate(u1, tomb_bits) != 0;
         tomb_bits >>= 1;
         if (!dies) continue;
@@ -865,7 +871,7 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live
     }
     const is_used = @truncate(u1, tomb_bits) == 0;
     if (is_used) {
-        log.debug("  (saving %{d} => {})", .{ inst, result });
+        log.debug("%{d} => {}", .{ inst, result });
         const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
         branch.inst_table.putAssumeCapacityNoClobber(inst, result);
 
@@ -4678,8 +4684,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
 
     const reloc = try self.genCondBrMir(cond_ty, cond);
 
-    log.debug("airCondBr: %{d}", .{inst});
-
     // If the condition dies here in this condbr instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -4736,6 +4740,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void {
     // that we can use all the code emitting abstractions. This is why at the bottom we
     // assert that parent_branch.free_registers equals the saved_then_branch.free_registers
     // rather than assigning it.
+    log.debug("airCondBr: %{d}", .{inst});
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});
@@ -5119,8 +5124,6 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
     );
     defer self.gpa.free(liveness.deaths);
 
-    log.debug("airSwitch: %{d}", .{inst});
-
     // If the condition dies here in this switch instruction, process
     // that death now instead of later as this has an effect on
     // whether it needs to be spilled in the branches
@@ -5207,6 +5210,7 @@ fn airSwitch(self: *Self, inst: Air.Inst.Index) !void {
 
     // Consolidate returned MCValues between prongs and else branch like we do
     // in airCondBr.
+    log.debug("airSwitch: %{d}", .{inst});
     log.debug("Upper branches:", .{});
     for (self.branch_stack.items) |bs| {
         log.debug("{}", .{bs.fmtDebug()});
-- 
cgit v1.2.3


From 28f525baa4ff0480043a16dd2467f231c8d6526a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 2 Sep 2022 21:51:49 +0200
Subject: x86_64: handle ptr_stack_offset for blocks

---
 src/arch/x86_64/CodeGen.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index bb6191af06..63f6b48d1e 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -5328,7 +5328,7 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void {
             block_data.mcv = switch (operand_mcv) {
                 .none, .dead, .unreach => unreachable,
                 .register, .stack_offset, .memory => operand_mcv,
-                .eflags, .immediate => blk: {
+                .eflags, .immediate, .ptr_stack_offset => blk: {
                     const new_mcv = try self.allocRegOrMem(block, true);
                     try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv);
                     break :blk new_mcv;
-- 
cgit v1.2.3


From 619d82234ecbb8a130e827e9de6f545288649a58 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 15:12:30 +0200
Subject: x86_64: clean up return registers for unspecified cc

---
 src/arch/x86_64/CodeGen.zig | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 63f6b48d1e..0269a44be8 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -439,16 +439,17 @@ fn gen(self: *Self) InnerError!void {
         });
 
         if (self.ret_mcv == .stack_offset) {
-            // The address where to store the return value for the caller is in `.rdi`
+            // The address where to store the return value for the caller is in a
             // register which the callee is free to clobber. Therefore, we purposely
             // spill it to stack immediately.
             const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8);
             self.next_stack_offset = stack_offset;
             self.max_end_stack = @maximum(self.max_end_stack, self.next_stack_offset);
 
-            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = .rdi }, .{});
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = ret_reg }, .{});
             self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) };
-            log.debug("gen: spilling .rdi to stack at offset {}", .{stack_offset});
+            log.debug("gen: spilling {s} to stack at offset {}", .{ @tagName(ret_reg), stack_offset });
         }
 
         _ = try self.addInst(.{
@@ -831,7 +832,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
     const air_tags = self.air.instructions.items(.tag);
     if (air_tags[inst] == .constant) return; // Constants are immortal.
-    log.debug("%{d} => {}", .{ inst, MCValue.dead });
+    log.debug("%{d} => {}", .{ inst, MCValue{ .dead = {} } });
     // When editing this function, note that the logic must synchronize with `reuseOperand`.
     const prev_value = self.getResolvedInstValue(inst);
     const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -3960,7 +3961,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
         try self.register_manager.getReg(reg, null);
     }
 
-    const rdi_lock: ?RegisterLock = blk: {
+    const ret_reg_lock: ?RegisterLock = blk: {
         if (info.return_value == .stack_offset) {
             const ret_ty = fn_ty.fnReturnType();
             const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*));
@@ -3968,17 +3969,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align));
             log.debug("airCall: return value on stack at offset {}", .{stack_offset});
 
-            try self.register_manager.getReg(.rdi, null);
-            try self.genSetReg(Type.usize, .rdi, .{ .ptr_stack_offset = stack_offset });
-            const rdi_lock = self.register_manager.lockRegAssumeUnused(.rdi);
+            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+            try self.register_manager.getReg(ret_reg, null);
+            try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset });
+            const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg);
 
             info.return_value.stack_offset = stack_offset;
 
-            break :blk rdi_lock;
+            break :blk ret_reg_lock;
         }
         break :blk null;
     };
-    defer if (rdi_lock) |lock| self.register_manager.unlockReg(lock);
+    defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock);
 
     for (args) |arg, arg_i| {
         const mc_arg = info.args[arg_i];
@@ -7292,11 +7294,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                     assert(ret_ty.isError());
                     result.return_value = .{ .immediate = 0 };
                 } else if (ret_ty_size <= 8) {
-                    result.return_value = .{ .register = .rdi };
+                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
+                    result.return_value = .{ .register = aliased_reg };
                 } else {
                     // We simply make the return MCValue a stack offset. However, the actual value
                     // for the offset will be populated later. We will also push the stack offset
-                    // value into .rdi register when we resolve the offset.
+                    // value into an appropriate register when we resolve the offset.
                     result.return_value = .{ .stack_offset = 0 };
                 }
             }
-- 
cgit v1.2.3


From e0167ae0e3f0e6c6a667b372b21c322d47ccd92d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 20:36:01 +0200
Subject: x86_64: allow for any index register in complex SIB encodings

This relieves register pressure, and reduce generated code size
(since now we can use the same index register for both `mov_scale_src`
and `mov_scale_dst` MIR instructions).

Fix lowering of ModRM + SIB encodings where index register is extended
- previously, we would carelessly ignore the fact generating incorrect
encodings.
---
 src/arch/x86_64/CodeGen.zig | 104 ++++++++++++++++----------------------------
 src/arch/x86_64/Emit.zig    |  69 ++++++++++++++++-------------
 src/arch/x86_64/Mir.zig     |  87 +++++++++++++++++++++++++++++++-----
 3 files changed, 154 insertions(+), 106 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 0269a44be8..b03acfb848 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -5932,7 +5932,6 @@ const InlineMemcpyOpts = struct {
     dest_stack_base: ?Register = null,
 };
 
-/// Spills .rax and .rcx.
 fn genInlineMemcpy(
     self: *Self,
     dst_ptr: MCValue,
@@ -5940,19 +5939,6 @@ fn genInlineMemcpy(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO: Preserve contents of .rax and .rcx if not free and locked, and then restore
-    // How can we do this without context if the value inside .rax or .rcx we preserve contains
-    // value needed to perform the memcpy in the first place?
-    // I think we should have an accumulator-based context that we pass with each subsequent helper
-    // call until we resolve the entire instruction.
-    try self.register_manager.getReg(.rax, null);
-    try self.register_manager.getReg(.rcx, null);
-
-    const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rcx });
-    defer for (reg_locks) |lock| {
-        self.register_manager.unlockReg(lock);
-    };
-
     const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
         self.register_manager.lockReg(reg)
     else
@@ -5965,7 +5951,13 @@ fn genInlineMemcpy(
         null;
     defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const dst_addr_reg = try self.register_manager.allocReg(null, gp);
+    const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp);
+    const dst_addr_reg = regs[0];
+    const src_addr_reg = regs[1];
+    const index_reg = regs[2].to64();
+    const count_reg = regs[3].to64();
+    const tmp_reg = regs[4].to8();
+
     switch (dst_ptr) {
         .memory,
         .got_load,
@@ -5998,10 +5990,7 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const dst_addr_reg_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
-    defer self.register_manager.unlockReg(dst_addr_reg_lock);
 
-    const src_addr_reg = try self.register_manager.allocReg(null, gp);
     switch (src_ptr) {
         .memory,
         .got_load,
@@ -6034,26 +6023,13 @@ fn genInlineMemcpy(
             return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
         },
     }
-    const src_addr_reg_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
-    defer self.register_manager.unlockReg(src_addr_reg_lock);
-
-    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
-    const count_reg = regs[0].to64();
-    const tmp_reg = regs[1].to8();
 
     try self.genSetReg(Type.usize, count_reg, len);
 
-    // mov rcx, 0
-    _ = try self.addInst(.{
-        .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 0 },
-    });
-
-    // mov rax, 0
+    // mov index_reg, 0
     _ = try self.addInst(.{
         .tag = .mov,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 0 },
     });
 
@@ -6075,37 +6051,30 @@ fn genInlineMemcpy(
         } },
     });
 
-    // mov tmp, [addr + rcx]
+    // mov tmp, [addr + index_reg]
     _ = try self.addInst(.{
         .tag = .mov_scale_src,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = tmp_reg.to8(),
             .reg2 = src_addr_reg,
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // mov [stack_offset + rax], tmp
+    // mov [stack_offset + index_reg], tmp
     _ = try self.addInst(.{
         .tag = .mov_scale_dst,
         .ops = Mir.Inst.Ops.encode(.{
             .reg1 = dst_addr_reg,
             .reg2 = tmp_reg.to8(),
         }),
-        .data = .{ .imm = 0 },
+        .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDisp.encode(index_reg, 0)) },
     });
 
-    // add rcx, 1
+    // add index_reg, 1
     _ = try self.addInst(.{
         .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rcx }),
-        .data = .{ .imm = 1 },
-    });
-
-    // add rax, 1
-    _ = try self.addInst(.{
-        .tag = .add,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
@@ -6127,7 +6096,6 @@ fn genInlineMemcpy(
     try self.performReloc(loop_reloc);
 }
 
-/// Spills .rax register.
 fn genInlineMemset(
     self: *Self,
     dst_ptr: MCValue,
@@ -6135,12 +6103,22 @@ fn genInlineMemset(
     len: MCValue,
     opts: InlineMemcpyOpts,
 ) InnerError!void {
-    // TODO preserve contents of .rax and then restore
-    try self.register_manager.getReg(.rax, null);
-    const rax_lock = self.register_manager.lockRegAssumeUnused(.rax);
-    defer self.register_manager.unlockReg(rax_lock);
+    const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
+
+    const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
+        self.register_manager.lockReg(reg)
+    else
+        null;
+    defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
+    const addr_reg = regs[0];
+    const index_reg = regs[1].to64();
 
-    const addr_reg = try self.register_manager.allocReg(null, gp);
     switch (dst_ptr) {
         .memory,
         .got_load,
@@ -6173,17 +6151,15 @@ fn genInlineMemset(
             return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
         },
     }
-    const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-    defer self.register_manager.unlockReg(addr_reg_lock);
 
-    try self.genSetReg(Type.usize, .rax, len);
-    try self.genBinOpMir(.sub, Type.usize, .{ .register = .rax }, .{ .immediate = 1 });
+    try self.genSetReg(Type.usize, index_reg, len);
+    try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 });
 
     // loop:
-    // cmp rax, -1
+    // cmp index_reg, -1
     const loop_start = try self.addInst(.{
         .tag = .cmp,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = @bitCast(u32, @as(i32, -1)) },
     });
 
@@ -6202,24 +6178,20 @@ fn genInlineMemset(
             if (x > math.maxInt(i32)) {
                 return self.fail("TODO inline memset for value immediate larger than 32bits", .{});
             }
-            // mov byte ptr [rbp + rax + stack_offset], imm
-            const payload = try self.addExtra(Mir.ImmPair{
-                .dest_off = 0,
-                .operand = @truncate(u32, x),
-            });
+            // mov byte ptr [rbp + index_reg + stack_offset], imm
             _ = try self.addInst(.{
                 .tag = .mov_mem_index_imm,
                 .ops = Mir.Inst.Ops.encode(.{ .reg1 = addr_reg }),
-                .data = .{ .payload = payload },
+                .data = .{ .payload = try self.addExtra(Mir.IndexRegisterDispImm.encode(index_reg, 0, @truncate(u32, x))) },
             });
         },
         else => return self.fail("TODO inline memset for value of type {}", .{value}),
     }
 
-    // sub rax, 1
+    // sub index_reg, 1
     _ = try self.addInst(.{
         .tag = .sub,
-        .ops = Mir.Inst.Ops.encode(.{ .reg1 = .rax }),
+        .ops = Mir.Inst.Ops.encode(.{ .reg1 = index_reg }),
         .data = .{ .imm = 1 },
     });
 
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 66e603aab0..61ae772794 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -615,14 +615,15 @@ inline fn immOpSize(u_imm: u32) u6 {
 fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
-    // OP reg1, [reg2 + scale*rcx + imm32]
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
+    // OP reg1, [reg2 + scale*index + imm32]
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rcx,
+        .index = index_reg_disp.index,
     };
     return lowerToRmEnc(tag, ops.reg1, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg2,
         .scale_index = scale_index,
     }), emit.code);
@@ -631,22 +632,16 @@ fn mirArithScaleSrc(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
 fn mirArithScaleDst(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
-    const imm = emit.mir.instructions.items(.data)[inst].imm;
+    const payload = emit.mir.instructions.items(.data)[inst].payload;
+    const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp.index,
     };
-    if (ops.reg2 == .none) {
-        // OP qword ptr [reg1 + scale*rax + 0], imm32
-        return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-            .disp = 0,
-            .base = ops.reg1,
-            .scale_index = scale_index,
-        }), imm, emit.code);
-    }
-    // OP [reg1 + scale*rax + imm32], reg2
+    assert(ops.reg2 != .none);
+    // OP [reg1 + scale*index + imm32], reg2
     return lowerToMrEnc(tag, RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg2.size()), .{
-        .disp = imm,
+        .disp = index_reg_disp.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
     }), ops.reg2, emit.code);
@@ -656,24 +651,24 @@ fn mirArithScaleImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     const scale = ops.flags;
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const scale_index = ScaleIndex{
         .scale = scale,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP qword ptr [reg1 + scale*rax + imm32], imm32
+    // OP qword ptr [reg1 + scale*index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(.qword_ptr, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
     const ops = emit.mir.instructions.items(.ops)[inst].decode();
     assert(ops.reg2 == .none);
     const payload = emit.mir.instructions.items(.data)[inst].payload;
-    const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data;
+    const index_reg_disp_imm = emit.mir.extraData(Mir.IndexRegisterDispImm, payload).data.decode();
     const ptr_size: Memory.PtrSize = switch (ops.flags) {
         0b00 => .byte_ptr,
         0b01 => .word_ptr,
@@ -682,14 +677,14 @@ fn mirArithMemIndexImm(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!v
     };
     const scale_index = ScaleIndex{
         .scale = 0,
-        .index = .rax,
+        .index = index_reg_disp_imm.index,
     };
-    // OP ptr [reg1 + rax*1 + imm32], imm32
+    // OP ptr [reg1 + index + imm32], imm32
     return lowerToMiEnc(tag, RegisterOrMemory.mem(ptr_size, .{
-        .disp = imm_pair.dest_off,
+        .disp = index_reg_disp_imm.disp,
         .base = ops.reg1,
         .scale_index = scale_index,
-    }), imm_pair.operand, emit.code);
+    }), index_reg_disp_imm.imm, emit.code);
 }
 
 fn mirMovSignExtend(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
@@ -957,18 +952,19 @@ fn mirLea(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             mem.writeIntLittle(i32, emit.code.items[end_offset - 4 ..][0..4], disp);
         },
         0b10 => {
-            // lea reg, [rbp + rcx + imm32]
-            const imm = emit.mir.instructions.items(.data)[inst].imm;
+            // lea reg, [rbp + index + imm32]
+            const payload = emit.mir.instructions.items(.data)[inst].payload;
+            const index_reg_disp = emit.mir.extraData(Mir.IndexRegisterDisp, payload).data.decode();
             const src_reg: ?Register = if (ops.reg2 != .none) ops.reg2 else null;
             const scale_index = ScaleIndex{
                 .scale = 0,
-                .index = .rcx,
+                .index = index_reg_disp.index,
             };
             return lowerToRmEnc(
                 .lea,
                 ops.reg1,
                 RegisterOrMemory.mem(Memory.PtrSize.new(ops.reg1.size()), .{
-                    .disp = imm,
+                    .disp = index_reg_disp.disp,
                     .base = src_reg,
                     .scale_index = scale_index,
                 }),
@@ -2255,6 +2251,7 @@ fn lowerToMxEnc(tag: Tag, reg_or_mem: RegisterOrMemory, enc: Encoding, code: *st
                 encoder.rex(.{
                     .w = wide,
                     .b = base.isExtended(),
+                    .x = if (mem_op.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2360,10 +2357,12 @@ fn lowerToMiXEnc(
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr,
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2415,11 +2414,13 @@ fn lowerToRmEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2460,11 +2461,13 @@ fn lowerToMrEnc(
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = dst_mem.ptr_size == .qword_ptr or setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2504,11 +2507,13 @@ fn lowerToRmiEnc(
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 encoder.rex(.{
                     .w = setRexWRegister(reg),
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             opc.encode(encoder);
@@ -2545,10 +2550,12 @@ fn lowerToVmEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (src_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
@@ -2585,10 +2592,12 @@ fn lowerToMvEnc(
                 vex.rex(.{
                     .r = reg.isExtended(),
                     .b = base.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             } else {
                 vex.rex(.{
                     .r = reg.isExtended(),
+                    .x = if (dst_mem.scale_index) |si| si.index.isExtended() else false,
                 });
             }
             encoder.vex(enc.prefix);
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index ca19847042..182f3267a6 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -44,25 +44,28 @@ pub const Inst = struct {
         ///       0b01 word ptr [reg1 + imm32], imm16
         ///       0b10 dword ptr [reg1 + imm32], imm32
         ///       0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `ImmPair` as payload
         adc_mem_imm,
 
-        /// form: reg1, [reg2 + scale*rcx + imm32]
+        /// form: reg1, [reg2 + scale*index + imm32]
         /// ops flags  scale
         ///      0b00      1
         ///      0b01      2
         ///      0b10      4
         ///      0b11      8
+        /// Notes:
+        ///  * Uses `IndexRegisterDisp` as payload
         adc_scale_src,
 
-        /// form: [reg1 + scale*rax + imm32], reg2
-        /// form: [reg1 + scale*rax + 0], imm32
+        /// form: [reg1 + scale*index + imm32], reg2
         /// ops flags  scale
         ///      0b00      1
         ///      0b01      2
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * If reg2 is `none` then it means Data field `imm` is used as the immediate.
+        ///  * Uses `IndexRegisterDisp` payload.
         adc_scale_dst,
 
         /// form: [reg1 + scale*rax + imm32], imm32
@@ -72,14 +75,16 @@ pub const Inst = struct {
         ///      0b10      4
         ///      0b11      8
         /// Notes:
-        ///  * Data field `payload` points at `ImmPair`.
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_scale_imm,
 
         /// ops flags: form:
-        ///       0b00 byte ptr [reg1 + rax + imm32], imm8
-        ///       0b01 word ptr [reg1 + rax + imm32], imm16
-        ///       0b10 dword ptr [reg1 + rax + imm32], imm32
-        ///       0b11 qword ptr [reg1 + rax + imm32], imm32 (sign-extended to imm64)
+        ///       0b00 byte ptr [reg1 + index + imm32], imm8
+        ///       0b01 word ptr [reg1 + index + imm32], imm16
+        ///       0b10 dword ptr [reg1 + index + imm32], imm32
+        ///       0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64)
+        /// Notes:
+        ///  * Uses `IndexRegisterDispImm` payload.
         adc_mem_index_imm,
 
         // The following instructions all have the same encoding as `adc`.
@@ -174,7 +179,9 @@ pub const Inst = struct {
         ///      0b00  reg1, [reg2 + imm32]
         ///      0b00  reg1, [ds:imm32]
         ///      0b01  reg1, [rip + imm32]
-        ///      0b10  reg1, [reg2 + rcx + imm32]
+        ///      0b10  reg1, [reg2 + index + imm32]
+        /// Notes:
+        ///  * 0b10 uses `IndexRegisterDisp` payload
         lea,
 
         /// ops flags: form:
@@ -461,6 +468,66 @@ pub const Inst = struct {
     }
 };
 
+pub const IndexRegisterDisp = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
+
+    /// Displacement value
+    disp: u32,
+
+    pub fn encode(index: Register, disp: u32) IndexRegisterDisp {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDisp) struct {
+        index: Register,
+        disp: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+        };
+    }
+};
+
+/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list
+/// instead of having two structs, one a superset of the other one?
+pub const IndexRegisterDispImm = struct {
+    /// Index register to use with SIB-based encoding
+    index: u32,
+
+    /// Displacement value
+    disp: u32,
+
+    /// Immediate
+    imm: u32,
+
+    pub fn encode(index: Register, disp: u32, imm: u32) IndexRegisterDispImm {
+        return .{
+            .index = @enumToInt(index),
+            .disp = disp,
+            .imm = imm,
+        };
+    }
+
+    pub fn decode(this: IndexRegisterDispImm) struct {
+        index: Register,
+        disp: u32,
+        imm: u32,
+    } {
+        return .{
+            .index = @intToEnum(Register, this.index),
+            .disp = this.disp,
+            .imm = this.imm,
+        };
+    }
+};
+
+/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
+/// in a compact manner.
 pub const RegisterList = struct {
     bitset: BitSet = BitSet.initEmpty(),
 
-- 
cgit v1.2.3


From 1e2a2d6fad8d83329de1b65338d741c1c6cd2e7d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sat, 3 Sep 2022 22:53:56 +0200
Subject: coff: fix bug in lowerUnnamedConst

---
 src/link/Coff.zig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index b5670ce5a1..17ec6e5f9c 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -485,7 +485,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     try self.locals.append(gpa, .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
@@ -988,7 +988,7 @@ pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.In
     };
     defer gpa.free(sym_name);
     try self.setSymbolName(sym, sym_name);
-    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.?);
+    sym.section_number = @intToEnum(coff.SectionNumber, self.rdata_section_index.? + 1);
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
@@ -1334,7 +1334,7 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     sym.* = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
-        .section_number = @intToEnum(coff.SectionNumber, 0),
+        .section_number = .UNDEFINED,
         .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
         .storage_class = .NULL,
         .number_of_aux_symbols = 0,
-- 
cgit v1.2.3


From 66bad3eaaf82b21363c2212eeb1eaa4c171f2625 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sun, 4 Sep 2022 10:22:21 +0200
Subject: coff: mark relocations dirty when target atoms change

---
 src/arch/x86_64/Emit.zig |  2 --
 src/link/Coff.zig        | 56 ++++++++++++++++++++++++++----------------------
 2 files changed, 30 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 61ae772794..45e58be972 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1029,7 +1029,6 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .addend = 0,
             .pcrel = true,
             .length = 2,
-            .prev_vaddr = atom.getSymbol(coff_file).value,
         });
     } else {
         return emit.fail("TODO implement lea reg, [rip + reloc] for linking backends different than MachO", .{});
@@ -1165,7 +1164,6 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .addend = 0,
             .pcrel = true,
             .length = 2,
-            .prev_vaddr = atom.getSymbol(coff_file).value,
         });
     } else {
         return emit.fail("TODO implement call_extern for linking backends different than MachO", .{});
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 17ec6e5f9c..74d6ce372e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -114,11 +114,6 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
-/// A table of bindings indexed by the owning them `Atom`.
-/// Note that once we refactor `Atom`'s lifetime and ownership rules,
-/// this will be a table indexed by index into the list of Atoms.
-bindings: BindingTable = .{},
-
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -130,12 +125,11 @@ pub const Reloc = struct {
     addend: u32,
     pcrel: bool,
     length: u2,
-    prev_vaddr: u32,
+    dirty: bool = true,
 };
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
 const BaseRelocationTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(u32));
-const BindingTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(SymbolWithLoc));
 const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom));
 
 const default_file_alignment: u16 = 0x200;
@@ -192,6 +186,16 @@ pub const SymbolWithLoc = struct {
 
     // null means it's a synthetic global or Zig source.
     file: ?u32 = null,
+
+    pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool {
+        if (this.file == null and other.file == null) {
+            return this.sym_index == other.sym_index;
+        }
+        if (this.file != null and other.file != null) {
+            return this.sym_index == other.sym_index and this.file.? == other.file.?;
+        }
+        return false;
+    }
 };
 
 /// When allocating, the ideal_capacity is calculated by
@@ -314,14 +318,6 @@ pub fn deinit(self: *Coff) void {
         }
         self.base_relocs.deinit(gpa);
     }
-
-    {
-        var it = self.bindings.valueIterator();
-        while (it.next()) |bindings| {
-            bindings.deinit(gpa);
-        }
-        self.bindings.deinit(gpa);
-    }
 }
 
 fn populateMissingMetadata(self: *Coff) !void {
@@ -720,7 +716,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
         .addend = 0,
         .pcrel = false,
         .length = 3,
-        .prev_vaddr = sym.value,
     });
 
     const target_sym = self.getSymbol(target);
@@ -753,10 +748,6 @@ fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     log.debug("allocated import atom at 0x{x}", .{sym.value});
 
-    const target_sym = self.getSymbol(target);
-    assert(target_sym.section_number == .UNDEFINED);
-    try atom.addBinding(self, target);
-
     return atom;
 }
 
@@ -798,6 +789,17 @@ fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     }
 }
 
+fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
+    // TODO: reverse-lookup might come in handy here
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            if (!reloc.target.eql(target)) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
 fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     const relocs = self.relocs.get(atom) orelse return;
     const source_sym = atom.getSymbol(self);
@@ -807,6 +809,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     log.debug("relocating '{s}'", .{atom.getName(self)});
 
     for (relocs.items) |*reloc| {
+        if (!reloc.dirty) continue;
+
         const target_vaddr = switch (reloc.@"type") {
             .got => blk: {
                 const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
@@ -821,7 +825,6 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
-        if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
             source_sym.value + reloc.offset,
@@ -830,6 +833,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             @tagName(reloc.@"type"),
         });
 
+        reloc.dirty = false;
+
         if (reloc.pcrel) {
             const source_vaddr = source_sym.value + reloc.offset;
             const disp = target_vaddr_with_addend - source_vaddr - 4;
@@ -854,8 +859,6 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 else => unreachable,
             },
         }
-
-        reloc.prev_vaddr = target_vaddr_with_addend;
     }
 }
 
@@ -1131,7 +1134,9 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
             if (vaddr != sym.value) {
                 sym.value = vaddr;
                 log.debug("  (updating GOT entry)", .{});
-                const got_atom = self.getGotAtomForSymbol(.{ .sym_index = atom.sym_index, .file = null }).?;
+                const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
+                const got_atom = self.getGotAtomForSymbol(got_target).?;
+                self.markRelocsDirty(got_target);
                 try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
@@ -1156,6 +1161,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         try self.writePtrWidthAtom(got_atom);
     }
 
+    self.markRelocsDirty(atom.getSymbolWithLoc());
     try self.writeAtom(atom, code);
 }
 
@@ -1457,7 +1463,6 @@ pub fn getDeclVAddr(
 
     const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?;
     const target = SymbolWithLoc{ .sym_index = decl.link.coff.sym_index, .file = null };
-    const target_sym = self.getSymbol(target);
     try atom.addRelocation(self, .{
         .@"type" = .direct,
         .target = target,
@@ -1465,7 +1470,6 @@ pub fn getDeclVAddr(
         .addend = reloc_info.addend,
         .pcrel = false,
         .length = 3,
-        .prev_vaddr = target_sym.value,
     });
     try atom.addBaseRelocation(self, @intCast(u32, reloc_info.offset));
 
-- 
cgit v1.2.3


From 467d69c68aac0459b6a0c2876083d7295e43134d Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Sun, 4 Sep 2022 21:27:26 +0200
Subject: x86_64: fix SystemV calling convention

---
 src/arch/x86_64/CodeGen.zig | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index b03acfb848..25e8695e82 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -7216,19 +7216,23 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
             for (param_types) |ty, i| {
                 assert(ty.hasRuntimeBits());
 
-                if (self.target.os.tag != .windows) {
-                    return self.fail("TODO SysV calling convention", .{});
+                const classes: []const abi.Class = switch (self.target.os.tag) {
+                    .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
+                    else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*), .none),
+                };
+                if (classes.len > 1) {
+                    return self.fail("TODO handle multiple classes per type", .{});
                 }
-
-                switch (abi.classifyWindows(ty, self.target.*)) {
+                switch (classes[0]) {
                     .integer => blk: {
                         if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
                         result.args[i] = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
                         continue;
                     },
-                    .sse => return self.fail("TODO float/vector via SSE on Windows", .{}),
                     .memory => {}, // fallthrough
-                    else => unreachable,
+                    else => |class| return self.fail("TODO handle calling convention class {s}", .{
+                        @tagName(class),
+                    }),
                 }
 
                 const param_size = @intCast(u32, ty.abiSize(self.target.*));
@@ -7237,7 +7241,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 result.args[i] = .{ .stack_offset = @intCast(i32, offset) };
                 next_stack_offset = offset;
             }
-            // Align the stack to 16bytes before allocating shadow stack space.
+
+            // Align the stack to 16bytes before allocating shadow stack space (if any).
             const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
             const padding = aligned_next_stack_offset - next_stack_offset;
             if (padding > 0) {
@@ -7247,11 +7252,13 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
                 }
             }
 
-            // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push
-            // the args onto the stack so that there is no padding between the first argument and
-            // the standard preamble.
-            // alignment padding | ret value (if > 8) | args ... | shadow stack space | $rbp |
-            result.stack_byte_count = aligned_next_stack_offset + 4 * @sizeOf(u64);
+            const shadow_stack_space: u32 = switch (self.target.os.tag) {
+                .windows => @intCast(u32, 4 * @sizeOf(u64)),
+                else => 0,
+            };
+
+            // alignment padding | args ... | shadow stack space (if any) | ret addr | $rbp |
+            result.stack_byte_count = aligned_next_stack_offset + shadow_stack_space;
             result.stack_align = 16;
         },
         .Unspecified => {
-- 
cgit v1.2.3


From f1bdf3f62f05388b75d6816b65c9cc5caec71cd9 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 09:25:47 +0200
Subject: coff: fix writing strtab to PE image file

---
 src/link/Coff.zig   | 50 +++++++++++++++++++++++++++++++++++---------------
 src/link/strtab.zig |  4 ++++
 2 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 74d6ce372e..a85f8c3396 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -324,6 +324,19 @@ fn populateMissingMetadata(self: *Coff) !void {
     assert(self.llvm_object == null);
     const gpa = self.base.allocator;
 
+    try self.strtab.buffer.ensureUnusedCapacity(gpa, @sizeOf(u32));
+    self.strtab.buffer.appendNTimesAssumeCapacity(0, @sizeOf(u32));
+
+    // Index 0 is always a null symbol.
+    try self.locals.append(gpa, .{
+        .name = [_]u8{0} ** 8,
+        .value = 0,
+        .section_number = .UNDEFINED,
+        .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
+        .storage_class = .NULL,
+        .number_of_aux_symbols = 0,
+    });
+
     if (self.text_section_index == null) {
         self.text_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.program_code_size_hint);
@@ -472,21 +485,11 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.strtab_offset == null) {
-        try self.strtab.buffer.append(gpa, 0);
-        self.strtab_offset = self.findFreeSpace(@intCast(u32, self.strtab.len()), 1);
-        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + self.strtab.len() });
+        const file_size = @intCast(u32, self.strtab.len());
+        self.strtab_offset = self.findFreeSpace(file_size, @alignOf(u32)); // 4bytes aligned seems like a good idea here
+        log.debug("found strtab free space 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + file_size });
     }
 
-    // Index 0 is always a null symbol.
-    try self.locals.append(gpa, .{
-        .name = [_]u8{0} ** 8,
-        .value = 0,
-        .section_number = .UNDEFINED,
-        .@"type" = .{ .base_type = .NULL, .complex_type = .NULL },
-        .storage_class = .NULL,
-        .number_of_aux_symbols = 0,
-    });
-
     {
         // We need to find out what the max file offset is according to section headers.
         // Otherwise, we may end up with an COFF binary with file size not matching the final section's
@@ -1672,11 +1675,20 @@ fn writeStrtab(self: *Coff) !void {
 
     if (needed_size > allocated_size) {
         self.strtab_offset = null;
-        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, 1));
+        self.strtab_offset = @intCast(u32, self.findFreeSpace(needed_size, @alignOf(u32)));
     }
 
     log.debug("writing strtab from 0x{x} to 0x{x}", .{ self.strtab_offset.?, self.strtab_offset.? + needed_size });
-    try self.base.file.?.pwriteAll(self.strtab.buffer.items, self.strtab_offset.?);
+
+    var buffer = std.ArrayList(u8).init(self.base.allocator);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacityPrecise(needed_size);
+    buffer.appendSliceAssumeCapacity(self.strtab.items());
+    // Here, we do a trick in that we do not commit the size of the strtab to strtab buffer, instead
+    // we write the length of the strtab to a temporary buffer that goes to file.
+    mem.writeIntLittle(u32, buffer.items[0..4], @intCast(u32, self.strtab.len()));
+
+    try self.base.file.?.pwriteAll(buffer.items, self.strtab_offset.?);
 }
 
 fn writeSectionHeaders(self: *Coff) !void {
@@ -1984,6 +1996,14 @@ fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !v
     mem.set(u8, header.name[name_offset.len..], 0);
 }
 
+fn getSectionName(self: *const Coff, header: *const coff.SectionHeader) []const u8 {
+    if (header.getName()) |name| {
+        return name;
+    }
+    const offset = header.getNameOffset().?;
+    return self.strtab.get(offset).?;
+}
+
 fn setSymbolName(self: *Coff, symbol: *coff.Symbol, name: []const u8) !void {
     if (name.len <= 8) {
         mem.copy(u8, &symbol.name, name);
diff --git a/src/link/strtab.zig b/src/link/strtab.zig
index 8e314f189f..abb58defef 100644
--- a/src/link/strtab.zig
+++ b/src/link/strtab.zig
@@ -110,6 +110,10 @@ pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type {
             return self.get(off) orelse unreachable;
         }
 
+        pub fn items(self: Self) []const u8 {
+            return self.buffer.items;
+        }
+
         pub fn len(self: Self) usize {
             return self.buffer.items.len;
         }
-- 
cgit v1.2.3


From 08f6546c8405dd7d9da80f857122f43f4d627a22 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 10:34:16 +0200
Subject: coff: create a helper for allocating sections

---
 src/link/Coff.zig | 171 +++++++++++++++---------------------------------------
 1 file changed, 48 insertions(+), 123 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index a85f8c3396..23f0b7ea9d 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -338,150 +338,54 @@ fn populateMissingMetadata(self: *Coff) !void {
     });
 
     if (self.text_section_index == null) {
-        self.text_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.program_code_size_hint);
-        const off = self.findFreeSpace(file_size, self.page_size); // TODO we are over-aligning in file; we should track both in file and in memory pointers
-        log.debug("found .text free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_CODE = 1,
-                .MEM_EXECUTE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".text");
-        try self.sections.append(gpa, .{ .header = header });
+        self.text_section_index = try self.allocateSection(".text", file_size, .{
+            .CNT_CODE = 1,
+            .MEM_EXECUTE = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.got_section_index == null) {
-        self.got_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .got free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".got");
-        try self.sections.append(gpa, .{ .header = header });
+        self.got_section_index = try self.allocateSection(".got", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.rdata_section_index == null) {
-        self.rdata_section_index = @intCast(u16, self.sections.slice().len);
         const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .rdata free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".rdata");
-        try self.sections.append(gpa, .{ .header = header });
+        self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.data_section_index == null) {
-        self.data_section_index = @intCast(u16, self.sections.slice().len);
         const file_size: u32 = 1024;
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .data free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-                .MEM_WRITE = 1,
-            },
-        };
-        try self.setSectionName(&header, ".data");
-        try self.sections.append(gpa, .{ .header = header });
+        self.data_section_index = try self.allocateSection(".data", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+            .MEM_WRITE = 1,
+        });
     }
 
     if (self.reloc_section_index == null) {
-        self.reloc_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .reloc free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_DISCARDABLE = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".reloc");
-        try self.sections.append(gpa, .{ .header = header });
+        self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_DISCARDABLE = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.idata_section_index == null) {
-        self.idata_section_index = @intCast(u16, self.sections.slice().len);
         const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        const off = self.findFreeSpace(file_size, self.page_size);
-        log.debug("found .idata free space 0x{x} to 0x{x}", .{ off, off + file_size });
-        var header = coff.SectionHeader{
-            .name = undefined,
-            .virtual_size = file_size,
-            .virtual_address = off,
-            .size_of_raw_data = file_size,
-            .pointer_to_raw_data = off,
-            .pointer_to_relocations = 0,
-            .pointer_to_linenumbers = 0,
-            .number_of_relocations = 0,
-            .number_of_linenumbers = 0,
-            .flags = .{
-                .CNT_INITIALIZED_DATA = 1,
-                .MEM_READ = 1,
-            },
-        };
-        try self.setSectionName(&header, ".idata");
-        try self.sections.append(gpa, .{ .header = header });
+        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
+            .CNT_INITIALIZED_DATA = 1,
+            .MEM_READ = 1,
+        });
     }
 
     if (self.strtab_offset == null) {
@@ -505,6 +409,27 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 }
 
+fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
+    const index = @intCast(u16, self.sections.slice().len);
+    const off = self.findFreeSpace(size, self.page_size); // TODO: we overalign here
+    log.debug("found {s} free space 0x{x} to 0x{x}", .{ name, off, off + size });
+    var header = coff.SectionHeader{
+        .name = undefined,
+        .virtual_size = size,
+        .virtual_address = off,
+        .size_of_raw_data = size,
+        .pointer_to_raw_data = off,
+        .pointer_to_relocations = 0,
+        .pointer_to_linenumbers = 0,
+        .number_of_relocations = 0,
+        .number_of_linenumbers = 0,
+        .flags = flags,
+    };
+    try self.setSectionName(&header, name);
+    try self.sections.append(self.base.allocator, .{ .header = header });
+    return index;
+}
+
 pub fn allocateDeclIndexes(self: *Coff, decl_index: Module.Decl.Index) !void {
     if (self.llvm_object) |_| return;
     const decl = self.base.options.module.?.declPtr(decl_index);
-- 
cgit v1.2.3


From 79e51c5e4b5f9701676079ea23e67cc355a8d42c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Mon, 5 Sep 2022 16:06:58 +0200
Subject: coff: differentiate between file space and VM space for alloc

---
 src/link/Coff.zig | 76 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 62 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 23f0b7ea9d..3ffa86d6f8 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -411,12 +411,19 @@ fn populateMissingMetadata(self: *Coff) !void {
 
 fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
     const index = @intCast(u16, self.sections.slice().len);
-    const off = self.findFreeSpace(size, self.page_size); // TODO: we overalign here
-    log.debug("found {s} free space 0x{x} to 0x{x}", .{ name, off, off + size });
+    const off = self.findFreeSpace(size, default_file_alignment);
+    const vaddr = self.findFreeSpaceVM(size, self.page_size);
+    log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+        name,
+        off,
+        off + size,
+        vaddr,
+        vaddr + size,
+    });
     var header = coff.SectionHeader{
         .name = undefined,
         .virtual_size = size,
-        .virtual_address = off,
+        .virtual_address = vaddr,
         .size_of_raw_data = size,
         .pointer_to_raw_data = off,
         .pointer_to_relocations = 0,
@@ -513,16 +520,22 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
         const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
         const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address;
         if (needed_size > sect_capacity) {
+            // const new_offset = self.findFreeSpace(needed_size, self.page_size);
+            // const current_size = if (last_atom) |atom| blk: {
+            //     const sym = last_atom.getSymbol(self);
+            //     break :blk (sym.value + atom.size) - header.virtual_address;
+            // } else 0;
+            // log.debug("moving {s} from 0x{x} to 0x{x}", .{ header.pointer_to_raw_data, new_offset });
+            // const amt = try self.base.file.?.copyRangeAll(header.pointer_to_raw_data, self.base.file.?, new_offset, current_size);
+            // if (amt != current_size) return error.InputOutput;
+
             @panic("TODO move section");
+            // header.virtual_size = needed_size;
+            // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
         }
         maybe_last_atom.* = atom;
-        // header.virtual_size = needed_size;
-        // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
     }
 
-    // if (header.getAlignment().? < alignment) {
-    //     header.setAlignment(alignment);
-    // }
     atom.size = new_atom_size;
     atom.alignment = alignment;
 
@@ -1778,14 +1791,15 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = self.getSizeOfHeaders();
+    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
     if (start < headers_size)
         return headers_size;
 
-    const end = start + size;
+    const end = start + padToIdeal(size);
 
     if (self.strtab_offset) |off| {
-        const increased_size = @intCast(u32, self.strtab.len());
+        const tight_size = @intCast(u32, self.strtab.len());
+        const increased_size = padToIdeal(tight_size);
         const test_end = off + increased_size;
         if (end > off and start < test_end) {
             return test_end;
@@ -1793,7 +1807,8 @@ fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
     }
 
     for (self.sections.items(.header)) |header| {
-        const increased_size = header.size_of_raw_data;
+        const tight_size = header.size_of_raw_data;
+        const increased_size = padToIdeal(tight_size);
         const test_end = header.pointer_to_raw_data + increased_size;
         if (end > header.pointer_to_raw_data and start < test_end) {
             return test_end;
@@ -1803,7 +1818,7 @@ fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
     return null;
 }
 
-pub fn allocatedSize(self: *Coff, start: u32) u32 {
+fn allocatedSize(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
@@ -1817,7 +1832,7 @@ pub fn allocatedSize(self: *Coff, start: u32) u32 {
     return min_pos - start;
 }
 
-pub fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
+fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     var start: u32 = 0;
     while (self.detectAllocCollision(start, object_size)) |item_end| {
         start = mem.alignForwardGeneric(u32, item_end, min_alignment);
@@ -1825,6 +1840,39 @@ pub fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
+fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
+    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
+    if (start < headers_size)
+        return headers_size;
+
+    const end = start + size;
+
+    if (self.strtab_offset) |off| {
+        const increased_size = @intCast(u32, self.strtab.len());
+        const test_end = off + increased_size;
+        if (end > off and start < test_end) {
+            return test_end;
+        }
+    }
+
+    for (self.sections.items(.header)) |header| {
+        const increased_size = header.virtual_size;
+        const test_end = header.virtual_address + increased_size;
+        if (end > header.virtual_address and start < test_end) {
+            return test_end;
+        }
+    }
+    return null;
+}
+
+fn findFreeSpaceVM(self: *Coff, object_size: u32, min_alignment: u32) u32 {
+    var start: u32 = 0;
+    while (self.detectAllocCollisionVM(start, object_size)) |item_end| {
+        start = mem.alignForwardGeneric(u32, item_end, min_alignment);
+    }
+    return start;
+}
+
 inline fn getSizeOfHeaders(self: Coff) u32 {
     const msdos_hdr_size = msdos_stub.len + 4;
     return @intCast(u32, msdos_hdr_size + @sizeOf(coff.CoffHeader) + self.getOptionalHeaderSize() +
-- 
cgit v1.2.3


From 9116e0f7464c65912d758ebd58aad35d9b07cabc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 09:59:42 +0200
Subject: coff: find new file space for a section (file offsets)

---
 src/link/Coff.zig | 81 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 66 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 3ffa86d6f8..d31344a23e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -520,19 +520,36 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
         const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
         const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address;
         if (needed_size > sect_capacity) {
-            // const new_offset = self.findFreeSpace(needed_size, self.page_size);
-            // const current_size = if (last_atom) |atom| blk: {
-            //     const sym = last_atom.getSymbol(self);
-            //     break :blk (sym.value + atom.size) - header.virtual_address;
-            // } else 0;
-            // log.debug("moving {s} from 0x{x} to 0x{x}", .{ header.pointer_to_raw_data, new_offset });
-            // const amt = try self.base.file.?.copyRangeAll(header.pointer_to_raw_data, self.base.file.?, new_offset, current_size);
-            // if (amt != current_size) return error.InputOutput;
-
-            @panic("TODO move section");
-            // header.virtual_size = needed_size;
-            // header.size_of_raw_data = mem.alignForwardGeneric(u32, needed_size, default_file_alignment);
+            const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+            const current_size = if (maybe_last_atom.*) |last_atom| blk: {
+                const sym = last_atom.getSymbol(self);
+                break :blk (sym.value + last_atom.size) - header.virtual_address;
+            } else 0;
+            log.debug("moving {s} from (0x{x} - 0x{x}) to (0x{x} - 0x{x})", .{
+                self.getSectionName(header),
+                header.pointer_to_raw_data,
+                header.pointer_to_raw_data + current_size,
+                new_offset,
+                new_offset + current_size,
+            });
+            const amt = try self.base.file.?.copyRangeAll(
+                header.pointer_to_raw_data,
+                self.base.file.?,
+                new_offset,
+                current_size,
+            );
+            if (amt != current_size) return error.InputOutput;
+            header.pointer_to_raw_data = new_offset;
+        }
+
+        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            log.err("needed {x}, available {x}", .{ needed_size, sect_vm_capacity });
+            @panic("TODO expand section in virtual address space");
         }
+
+        header.virtual_size = @maximum(header.virtual_size, needed_size);
+        header.size_of_raw_data = needed_size;
         maybe_last_atom.* = atom;
     }
 
@@ -778,8 +795,9 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 
         if (reloc.pcrel) {
             const source_vaddr = source_sym.value + reloc.offset;
-            const disp = target_vaddr_with_addend - source_vaddr - 4;
-            try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
+            const disp =
+                @intCast(i32, target_vaddr_with_addend) - @intCast(i32, source_vaddr) - 4;
+            try self.base.file.?.pwriteAll(mem.asBytes(&disp), file_offset + reloc.offset);
             continue;
         }
 
@@ -1515,7 +1533,24 @@ fn writeBaseRelocations(self: *Coff) !void {
     const header = &self.sections.items(.header)[self.reloc_section_index.?];
     const sect_capacity = self.allocatedSize(header.pointer_to_raw_data);
     const needed_size = @intCast(u32, buffer.items.len);
-    assert(needed_size < sect_capacity); // TODO expand .reloc section
+    if (needed_size > sect_capacity) {
+        const new_offset = self.findFreeSpace(needed_size, default_file_alignment);
+        log.debug("writing {s} at 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
+            self.getSectionName(header),
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + needed_size,
+            new_offset,
+            new_offset + needed_size,
+        });
+        header.pointer_to_raw_data = new_offset;
+
+        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        if (needed_size > sect_vm_capacity) {
+            @panic("TODO expand section in virtual address space");
+        }
+    }
+    header.virtual_size = @maximum(header.virtual_size, needed_size);
+    header.size_of_raw_data = needed_size;
 
     try self.base.file.?.pwriteAll(buffer.items, header.pointer_to_raw_data);
 
@@ -1608,6 +1643,8 @@ fn writeImportTable(self: *Coff) !void {
 }
 
 fn writeStrtab(self: *Coff) !void {
+    if (self.strtab_offset == null) return;
+
     const allocated_size = self.allocatedSize(self.strtab_offset.?);
     const needed_size = @intCast(u32, self.strtab.len());
 
@@ -1840,6 +1877,20 @@ fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
+fn allocatedSizeVM(self: *Coff, start: u32) u32 {
+    if (start == 0)
+        return 0;
+    var min_pos: u32 = std.math.maxInt(u32);
+    if (self.strtab_offset) |off| {
+        if (off > start and off < min_pos) min_pos = off;
+    }
+    for (self.sections.items(.header)) |header| {
+        if (header.virtual_address <= start) continue;
+        if (header.virtual_address < min_pos) min_pos = header.virtual_address;
+    }
+    return min_pos - start;
+}
+
 fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
     const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
     if (start < headers_size)
-- 
cgit v1.2.3


From 2b373b05793d617f308c7f99b35be7ad1ca0321f Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 14:12:01 +0200
Subject: coff: grow section in virtual address space when required

---
 src/link/Coff.zig | 120 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index d31344a23e..433355b54d 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -126,6 +126,15 @@ pub const Reloc = struct {
     pcrel: bool,
     length: u2,
     dirty: bool = true,
+
+    /// Returns an Atom which is the target node of this relocation edge (if any).
+    fn getTargetAtom(self: Reloc, coff_file: *Coff) ?*Atom {
+        switch (self.@"type") {
+            .got => return coff_file.getGotAtomForSymbol(self.target),
+            .direct => return coff_file.getAtomForSymbol(self.target),
+            .imports => return coff_file.getImportAtomForSymbol(self.target),
+        }
+    }
 };
 
 const RelocTable = std.AutoHashMapUnmanaged(*Atom, std.ArrayListUnmanaged(Reloc));
@@ -355,7 +364,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.rdata_section_index == null) {
-        const file_size: u32 = 1024;
+        const file_size: u32 = self.page_size;
         self.rdata_section_index = try self.allocateSection(".rdata", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
             .MEM_READ = 1,
@@ -363,7 +372,7 @@ fn populateMissingMetadata(self: *Coff) !void {
     }
 
     if (self.data_section_index == null) {
-        const file_size: u32 = 1024;
+        const file_size: u32 = self.page_size;
         self.data_section_index = try self.allocateSection(".data", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
             .MEM_READ = 1,
@@ -371,19 +380,19 @@ fn populateMissingMetadata(self: *Coff) !void {
         });
     }
 
-    if (self.reloc_section_index == null) {
-        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
-        self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
+    if (self.idata_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
+        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
-            .MEM_DISCARDABLE = 1,
             .MEM_READ = 1,
         });
     }
 
-    if (self.idata_section_index == null) {
-        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * self.ptr_width.abiSize();
-        self.idata_section_index = try self.allocateSection(".idata", file_size, .{
+    if (self.reloc_section_index == null) {
+        const file_size = @intCast(u32, self.base.options.symbol_count_hint) * @sizeOf(coff.BaseRelocation);
+        self.reloc_section_index = try self.allocateSection(".reloc", file_size, .{
             .CNT_INITIALIZED_DATA = 1,
+            .MEM_DISCARDABLE = 1,
             .MEM_READ = 1,
         });
     }
@@ -437,6 +446,35 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     return index;
 }
 
+fn growSectionVM(self: *Coff, sect_id: u32, needed_size: u32) !void {
+    const header = &self.sections.items(.header)[sect_id];
+    const increased_size = padToIdeal(needed_size);
+    const old_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, header.virtual_size, self.page_size);
+    const new_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, increased_size, self.page_size);
+    const diff = new_aligned_end - old_aligned_end;
+
+    // TODO: enforce order by increasing VM addresses in self.sections container.
+    // This is required by the loader anyhow as far as I can tell.
+    for (self.sections.items(.header)[sect_id + 1 ..]) |*next_header, next_sect_id| {
+        const maybe_last_atom = &self.sections.items(.last_atom)[sect_id + 1 + next_sect_id];
+        next_header.virtual_address += diff;
+
+        if (maybe_last_atom.*) |last_atom| {
+            var atom = last_atom;
+            while (true) {
+                const sym = atom.getSymbolPtr(self);
+                sym.value += diff;
+
+                if (atom.prev) |prev| {
+                    atom = prev;
+                } else break;
+            }
+        }
+    }
+
+    header.virtual_size = increased_size;
+}
+
 pub fn allocateDeclIndexes(self: *Coff, decl_index: Module.Decl.Index) !void {
     if (self.llvm_object) |_| return;
     const decl = self.base.options.module.?.declPtr(decl_index);
@@ -525,13 +563,7 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
                 const sym = last_atom.getSymbol(self);
                 break :blk (sym.value + last_atom.size) - header.virtual_address;
             } else 0;
-            log.debug("moving {s} from (0x{x} - 0x{x}) to (0x{x} - 0x{x})", .{
-                self.getSectionName(header),
-                header.pointer_to_raw_data,
-                header.pointer_to_raw_data + current_size,
-                new_offset,
-                new_offset + current_size,
-            });
+            log.debug("moving {s} from 0x{x} to 0x{x}", .{ self.getSectionName(header), header.pointer_to_raw_data, new_offset });
             const amt = try self.base.file.?.copyRangeAll(
                 header.pointer_to_raw_data,
                 self.base.file.?,
@@ -544,8 +576,8 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
 
         const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
-            log.err("needed {x}, available {x}", .{ needed_size, sect_vm_capacity });
-            @panic("TODO expand section in virtual address space");
+            try self.growSectionVM(sect_id, needed_size);
+            self.markRelocsDirtyByAddress(header.virtual_address + needed_size);
         }
 
         header.virtual_size = @maximum(header.virtual_size, needed_size);
@@ -747,7 +779,7 @@ fn writePtrWidthAtom(self: *Coff, atom: *Atom) !void {
     }
 }
 
-fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
+fn markRelocsDirtyByTarget(self: *Coff, target: SymbolWithLoc) void {
     // TODO: reverse-lookup might come in handy here
     var it = self.relocs.valueIterator();
     while (it.next()) |relocs| {
@@ -758,6 +790,18 @@ fn markRelocsDirty(self: *Coff, target: SymbolWithLoc) void {
     }
 }
 
+fn markRelocsDirtyByAddress(self: *Coff, addr: u32) void {
+    var it = self.relocs.valueIterator();
+    while (it.next()) |relocs| {
+        for (relocs.items) |*reloc| {
+            const target_atom = reloc.getTargetAtom(self) orelse continue;
+            const target_sym = target_atom.getSymbol(self);
+            if (target_sym.value < addr) continue;
+            reloc.dirty = true;
+        }
+    }
+}
+
 fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     const relocs = self.relocs.get(atom) orelse return;
     const source_sym = atom.getSymbol(self);
@@ -769,19 +813,8 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
     for (relocs.items) |*reloc| {
         if (!reloc.dirty) continue;
 
-        const target_vaddr = switch (reloc.@"type") {
-            .got => blk: {
-                const got_atom = self.getGotAtomForSymbol(reloc.target) orelse continue;
-                break :blk got_atom.getSymbol(self).value;
-            },
-            .direct => blk: {
-                break :blk self.getSymbol(reloc.target).value;
-            },
-            .imports => blk: {
-                const import_atom = self.getImportAtomForSymbol(reloc.target) orelse continue;
-                break :blk import_atom.getSymbol(self).value;
-            },
-        };
+        const target_atom = reloc.getTargetAtom(self) orelse continue;
+        const target_vaddr = target_atom.getSymbol(self).value;
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
@@ -1095,7 +1128,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
                 log.debug("  (updating GOT entry)", .{});
                 const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
                 const got_atom = self.getGotAtomForSymbol(got_target).?;
-                self.markRelocsDirty(got_target);
+                self.markRelocsDirtyByTarget(got_target);
                 try self.writePtrWidthAtom(got_atom);
             }
         } else if (code_len < atom.size) {
@@ -1120,7 +1153,7 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         try self.writePtrWidthAtom(got_atom);
     }
 
-    self.markRelocsDirty(atom.getSymbolWithLoc());
+    self.markRelocsDirtyByTarget(atom.getSymbolWithLoc());
     try self.writeAtom(atom, code);
 }
 
@@ -1546,7 +1579,8 @@ fn writeBaseRelocations(self: *Coff) !void {
 
         const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
-            @panic("TODO expand section in virtual address space");
+            // TODO: we want to enforce .reloc after every alloc section.
+            try self.growSectionVM(self.reloc_section_index.?, needed_size);
         }
     }
     header.virtual_size = @maximum(header.virtual_size, needed_size);
@@ -1881,9 +1915,6 @@ fn allocatedSizeVM(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
-    if (self.strtab_offset) |off| {
-        if (off > start and off < min_pos) min_pos = off;
-    }
     for (self.sections.items(.header)) |header| {
         if (header.virtual_address <= start) continue;
         if (header.virtual_address < min_pos) min_pos = header.virtual_address;
@@ -2116,3 +2147,16 @@ fn logSymtab(self: *Coff) void {
         }
     }
 }
+
+fn logSections(self: *Coff) void {
+    log.debug("sections:", .{});
+    for (self.sections.items(.header)) |*header| {
+        log.debug("  {s}: VM({x}, {x}) FILE({x}, {x})", .{
+            self.getSectionName(header),
+            header.virtual_address,
+            header.virtual_address + header.virtual_size,
+            header.pointer_to_raw_data,
+            header.pointer_to_raw_data + header.size_of_raw_data,
+        });
+    }
+}
-- 
cgit v1.2.3


From 16ca47b9b81c67c88d678b6230dd02ce9dad7f07 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 15:13:06 +0200
Subject: coff: remove redundant bits and clean up

---
 src/link/Coff.zig      | 60 +++++++++++++-------------------------------------
 src/link/Coff/Atom.zig |  7 ------
 2 files changed, 15 insertions(+), 52 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 433355b54d..e594810ee7 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -30,7 +30,6 @@ const TypedValue = @import("../TypedValue.zig");
 pub const base_tag: link.File.Tag = .coff;
 
 const msdos_stub = @embedFile("msdos-stub.bin");
-const N_DATA_DIRS: u5 = 16;
 
 /// If this is not null, an object file is created by LLVM and linked with LLD afterwards.
 llvm_object: ?*LlvmObject = null,
@@ -44,7 +43,7 @@ page_size: u32,
 objects: std.ArrayListUnmanaged(Object) = .{},
 
 sections: std.MultiArrayList(Section) = .{},
-data_directories: [N_DATA_DIRS]coff.ImageDataDirectory,
+data_directories: [coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory,
 
 text_section_index: ?u16 = null,
 got_section_index: ?u16 = null,
@@ -259,7 +258,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Coff {
         },
         .ptr_width = ptr_width,
         .page_size = page_size,
-        .data_directories = comptime mem.zeroes([N_DATA_DIRS]coff.ImageDataDirectory),
+        .data_directories = comptime mem.zeroes([coff.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff.ImageDataDirectory),
     };
 
     const use_llvm = build_options.have_llvm and options.use_llvm;
@@ -421,7 +420,12 @@ fn populateMissingMetadata(self: *Coff) !void {
 fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.SectionHeaderFlags) !u16 {
     const index = @intCast(u16, self.sections.slice().len);
     const off = self.findFreeSpace(size, default_file_alignment);
-    const vaddr = self.findFreeSpaceVM(size, self.page_size);
+    // Memory is always allocated in sequence
+    const vaddr = blk: {
+        if (index == 0) break :blk self.page_size;
+        const prev_header = self.sections.items(.header)[index - 1];
+        break :blk mem.alignForwardGeneric(u32, prev_header.virtual_address + prev_header.virtual_size, self.page_size);
+    };
     log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
         name,
         off,
@@ -574,7 +578,7 @@ fn allocateAtom(self: *Coff, atom: *Atom, new_atom_size: u32, alignment: u32) !u
             header.pointer_to_raw_data = new_offset;
         }
 
-        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
             try self.growSectionVM(sect_id, needed_size);
             self.markRelocsDirtyByAddress(header.virtual_address + needed_size);
@@ -857,10 +861,9 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
 fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
-    // TODO hashmap
-    for (self.managed_atoms.items) |owned| {
-        if (owned == atom) break;
-    } else atom.deinit(self.base.allocator);
+    // Remove any relocs and base relocs associated with this Atom
+    _ = self.relocs.remove(atom);
+    _ = self.base_relocs.remove(atom);
 
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
@@ -1577,7 +1580,7 @@ fn writeBaseRelocations(self: *Coff) !void {
         });
         header.pointer_to_raw_data = new_offset;
 
-        const sect_vm_capacity = self.allocatedSizeVM(header.virtual_address);
+        const sect_vm_capacity = self.allocatedVirtualSize(header.virtual_address);
         if (needed_size > sect_vm_capacity) {
             // TODO: we want to enforce .reloc after every alloc section.
             try self.growSectionVM(self.reloc_section_index.?, needed_size);
@@ -1862,7 +1865,7 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 fn detectAllocCollision(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
+    const headers_size = @maximum(self.getSizeOfHeaders(), self.page_size);
     if (start < headers_size)
         return headers_size;
 
@@ -1911,7 +1914,7 @@ fn findFreeSpace(self: *Coff, object_size: u32, min_alignment: u32) u32 {
     return start;
 }
 
-fn allocatedSizeVM(self: *Coff, start: u32) u32 {
+fn allocatedVirtualSize(self: *Coff, start: u32) u32 {
     if (start == 0)
         return 0;
     var min_pos: u32 = std.math.maxInt(u32);
@@ -1922,39 +1925,6 @@ fn allocatedSizeVM(self: *Coff, start: u32) u32 {
     return min_pos - start;
 }
 
-fn detectAllocCollisionVM(self: *Coff, start: u32, size: u32) ?u32 {
-    const headers_size = @maximum(self.getSizeOfHeaders(), 0x1000);
-    if (start < headers_size)
-        return headers_size;
-
-    const end = start + size;
-
-    if (self.strtab_offset) |off| {
-        const increased_size = @intCast(u32, self.strtab.len());
-        const test_end = off + increased_size;
-        if (end > off and start < test_end) {
-            return test_end;
-        }
-    }
-
-    for (self.sections.items(.header)) |header| {
-        const increased_size = header.virtual_size;
-        const test_end = header.virtual_address + increased_size;
-        if (end > header.virtual_address and start < test_end) {
-            return test_end;
-        }
-    }
-    return null;
-}
-
-fn findFreeSpaceVM(self: *Coff, object_size: u32, min_alignment: u32) u32 {
-    var start: u32 = 0;
-    while (self.detectAllocCollisionVM(start, object_size)) |item_end| {
-        start = mem.alignForwardGeneric(u32, item_end, min_alignment);
-    }
-    return start;
-}
-
 inline fn getSizeOfHeaders(self: Coff) u32 {
     const msdos_hdr_size = msdos_stub.len + 4;
     return @intCast(u32, msdos_hdr_size + @sizeOf(coff.CoffHeader) + self.getOptionalHeaderSize() +
diff --git a/src/link/Coff/Atom.zig b/src/link/Coff/Atom.zig
index 1d6e511f3b..ffd8fe45e6 100644
--- a/src/link/Coff/Atom.zig
+++ b/src/link/Coff/Atom.zig
@@ -4,8 +4,6 @@ const std = @import("std");
 const coff = std.coff;
 const log = std.log.scoped(.link);
 
-const Allocator = std.mem.Allocator;
-
 const Coff = @import("../Coff.zig");
 const Reloc = Coff.Reloc;
 const SymbolWithLoc = Coff.SymbolWithLoc;
@@ -41,11 +39,6 @@ pub const empty = Atom{
     .next = null,
 };
 
-pub fn deinit(self: *Atom, gpa: Allocator) void {
-    _ = self;
-    _ = gpa;
-}
-
 /// Returns symbol referencing this atom.
 pub fn getSymbol(self: Atom, coff_file: *const Coff) *const coff.Symbol {
     return coff_file.getSymbol(.{
-- 
cgit v1.2.3


From 7b8cc599d997759201a945d05b91c24f5cfe29d7 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 16:56:26 +0200
Subject: coff: use more generous initial memory sizes for sections

---
 src/link/Coff.zig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index e594810ee7..cdb0f9a9cc 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -421,11 +421,14 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     const index = @intCast(u16, self.sections.slice().len);
     const off = self.findFreeSpace(size, default_file_alignment);
     // Memory is always allocated in sequence
+    // TODO: investigate if we can allocate .text last; this way it would never need to grow in memory!
     const vaddr = blk: {
         if (index == 0) break :blk self.page_size;
         const prev_header = self.sections.items(.header)[index - 1];
         break :blk mem.alignForwardGeneric(u32, prev_header.virtual_address + prev_header.virtual_size, self.page_size);
     };
+    // We commit more memory than needed upfront so that we don't have to reallocate too soon.
+    const memsz = mem.alignForwardGeneric(u32, size, self.page_size) * 100;
     log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{
         name,
         off,
@@ -435,7 +438,7 @@ fn allocateSection(self: *Coff, name: []const u8, size: u32, flags: coff.Section
     });
     var header = coff.SectionHeader{
         .name = undefined,
-        .virtual_size = size,
+        .virtual_size = memsz,
         .virtual_address = vaddr,
         .size_of_raw_data = size,
         .pointer_to_raw_data = off,
@@ -456,6 +459,7 @@ fn growSectionVM(self: *Coff, sect_id: u32, needed_size: u32) !void {
     const old_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, header.virtual_size, self.page_size);
     const new_aligned_end = header.virtual_address + mem.alignForwardGeneric(u32, increased_size, self.page_size);
     const diff = new_aligned_end - old_aligned_end;
+    log.debug("growing {s} in virtual memory by {x}", .{ self.getSectionName(header), diff });
 
     // TODO: enforce order by increasing VM addresses in self.sections container.
     // This is required by the loader anyhow as far as I can tell.
-- 
cgit v1.2.3


From f3e4e44a2b8de8ee860c2c9d11ee1a770e625e0e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 20:16:39 +0200
Subject: coff: fix memory leak in incorrectly disposing of globals table

---
 src/link/Coff.zig | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index cdb0f9a9cc..cd529ddab0 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -292,7 +292,12 @@ pub fn deinit(self: *Coff) void {
     self.managed_atoms.deinit(gpa);
 
     self.locals.deinit(gpa);
+
+    for (self.globals.keys()) |key| {
+        gpa.free(key);
+    }
     self.globals.deinit(gpa);
+
     self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
     self.strtab.deinit(gpa);
-- 
cgit v1.2.3


From 99c2cb72e850ffdfd83abcc941c84a0053f8494e Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Tue, 6 Sep 2022 22:34:32 +0200
Subject: coff: track globals in contiguous array to allow for tombstones

---
 src/link/Coff.zig | 117 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index cd529ddab0..2abfc78d1e 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -53,10 +53,12 @@ reloc_section_index: ?u16 = null,
 idata_section_index: ?u16 = null,
 
 locals: std.ArrayListUnmanaged(coff.Symbol) = .{},
-globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{},
+globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{},
+resolver: std.StringHashMapUnmanaged(u32) = .{},
 unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{},
 
 locals_free_list: std.ArrayListUnmanaged(u32) = .{},
+globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 strtab: StringTable(.strtab) = .{},
 strtab_offset: ?u32 = null,
@@ -292,11 +294,15 @@ pub fn deinit(self: *Coff) void {
     self.managed_atoms.deinit(gpa);
 
     self.locals.deinit(gpa);
+    self.globals.deinit(gpa);
 
-    for (self.globals.keys()) |key| {
-        gpa.free(key);
+    {
+        var it = self.resolver.keyIterator();
+        while (it.next()) |key_ptr| {
+            gpa.free(key_ptr.*);
+        }
+        self.resolver.deinit(gpa);
     }
-    self.globals.deinit(gpa);
 
     self.unresolved.deinit(gpa);
     self.locals_free_list.deinit(gpa);
@@ -651,6 +657,30 @@ fn allocateSymbol(self: *Coff) !u32 {
     return index;
 }
 
+fn allocateGlobal(self: *Coff) !u32 {
+    const gpa = self.base.allocator;
+    try self.globals.ensureUnusedCapacity(gpa, 1);
+
+    const index = blk: {
+        if (self.globals_free_list.popOrNull()) |index| {
+            log.debug("  (reusing global index {d})", .{index});
+            break :blk index;
+        } else {
+            log.debug("  (allocating global index {d})", .{self.globals.items.len});
+            const index = @intCast(u32, self.globals.items.len);
+            _ = self.globals.addOneAssumeCapacity();
+            break :blk index;
+        }
+    };
+
+    self.globals.items[index] = .{
+        .sym_index = 0,
+        .file = null,
+    };
+
+    return index;
+}
+
 pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
     try self.got_entries.ensureUnusedCapacity(gpa, 1);
@@ -1340,7 +1370,7 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     const sym = self.getSymbolPtr(sym_loc);
     const sym_name = self.getSymbolName(sym_loc);
     log.debug("deleting export '{s}'", .{sym_name});
-    assert(sym.storage_class == .EXTERNAL);
+    assert(sym.storage_class == .EXTERNAL and sym.section_number != .UNDEFINED);
     sym.* = .{
         .name = [_]u8{0} ** 8,
         .value = 0,
@@ -1351,33 +1381,38 @@ pub fn deleteExport(self: *Coff, exp: Export) void {
     };
     self.locals_free_list.append(gpa, sym_index) catch {};
 
-    if (self.globals.get(sym_name)) |global| blk: {
-        if (global.sym_index != sym_index) break :blk;
-        if (global.file != null) break :blk;
-        const kv = self.globals.fetchSwapRemove(sym_name);
-        gpa.free(kv.?.key);
+    if (self.resolver.fetchRemove(sym_name)) |entry| {
+        defer gpa.free(entry.key);
+        self.globals_free_list.append(gpa, entry.value) catch {};
+        self.globals.items[entry.value] = .{
+            .sym_index = 0,
+            .file = null,
+        };
     }
 }
 
 fn resolveGlobalSymbol(self: *Coff, current: SymbolWithLoc) !void {
     const gpa = self.base.allocator;
     const sym = self.getSymbol(current);
-    _ = sym;
     const sym_name = self.getSymbolName(current);
 
-    const name = try gpa.dupe(u8, sym_name);
-    const global_index = @intCast(u32, self.globals.values().len);
-    _ = global_index;
-    const gop = try self.globals.getOrPut(gpa, name);
-    defer if (gop.found_existing) gpa.free(name);
-
-    if (!gop.found_existing) {
-        gop.value_ptr.* = current;
-        // TODO undef + tentative
+    const global_index = self.resolver.get(sym_name) orelse {
+        const name = try gpa.dupe(u8, sym_name);
+        const global_index = try self.allocateGlobal();
+        self.globals.items[global_index] = current;
+        try self.resolver.putNoClobber(gpa, name, global_index);
+        if (sym.section_number == .UNDEFINED) {
+            try self.unresolved.putNoClobber(gpa, global_index, false);
+        }
         return;
-    }
+    };
 
     log.debug("TODO finish resolveGlobalSymbols implementation", .{});
+
+    if (sym.section_number == .UNDEFINED) return;
+
+    _ = self.unresolved.swapRemove(global_index);
+    self.globals.items[global_index] = current;
 }
 
 pub fn flush(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -1415,7 +1450,7 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
 
     while (self.unresolved.popOrNull()) |entry| {
         assert(entry.value); // We only expect imports generated by the incremental linker for now.
-        const global = self.globals.values()[entry.key];
+        const global = self.globals.items[entry.key];
         if (self.imports_table.contains(global)) continue;
 
         _ = try self.allocateImportEntry(global);
@@ -1481,24 +1516,22 @@ pub fn getDeclVAddr(
 }
 
 pub fn getGlobalSymbol(self: *Coff, name: []const u8) !u32 {
-    const gpa = self.base.allocator;
-    const sym_name = try gpa.dupe(u8, name);
-    const global_index = @intCast(u32, self.globals.values().len);
-    const gop = try self.globals.getOrPut(gpa, sym_name);
-    defer if (gop.found_existing) gpa.free(sym_name);
-
-    if (gop.found_existing) {
-        // TODO audit this: can we ever reference anything from outside the Zig module?
-        assert(gop.value_ptr.file == null);
-        return gop.value_ptr.sym_index;
+    if (self.resolver.get(name)) |global_index| {
+        return self.globals.items[global_index].sym_index;
     }
 
+    const gpa = self.base.allocator;
     const sym_index = try self.allocateSymbol();
+    const global_index = try self.allocateGlobal();
     const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null };
+    self.globals.items[global_index] = sym_loc;
+
+    const sym_name = try gpa.dupe(u8, name);
     const sym = self.getSymbolPtr(sym_loc);
     try self.setSymbolName(sym, sym_name);
     sym.storage_class = .EXTERNAL;
-    gop.value_ptr.* = sym_loc;
+
+    try self.resolver.putNoClobber(gpa, sym_name, global_index);
     try self.unresolved.putNoClobber(gpa, global_index, true);
 
     return sym_index;
@@ -1607,14 +1640,15 @@ fn writeBaseRelocations(self: *Coff) !void {
 }
 
 fn writeImportTable(self: *Coff) !void {
+    if (self.idata_section_index == null) return;
+
     const gpa = self.base.allocator;
 
     const section = self.sections.get(self.idata_section_index.?);
+    const last_atom = section.last_atom orelse return;
+
     const iat_rva = section.header.virtual_address;
-    const iat_size = blk: {
-        const last_atom = section.last_atom.?;
-        break :blk last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
-    };
+    const iat_size = last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
 
     const dll_name = "KERNEL32.dll";
 
@@ -1975,7 +2009,8 @@ inline fn getSizeOfImage(self: Coff) u32 {
 /// Returns symbol location corresponding to the set entrypoint (if any).
 pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
     const entry_name = self.base.options.entry orelse "wWinMainCRTStartup"; // TODO this is incomplete
-    return self.globals.get(entry_name);
+    const global_index = self.resolver.get(entry_name) orelse return null;
+    return self.globals.items[global_index];
 }
 
 /// Returns pointer-to-symbol described by `sym_with_loc` descriptor.
@@ -2100,9 +2135,9 @@ fn logSymtab(self: *Coff) void {
     }
 
     log.debug("globals table:", .{});
-    for (self.globals.keys()) |name, id| {
-        const value = self.globals.values()[id];
-        log.debug("  {s} => %{d} in object({?d})", .{ name, value.sym_index, value.file });
+    for (self.globals.items) |sym_loc| {
+        const sym_name = self.getSymbolName(sym_loc);
+        log.debug("  {s} => %{d} in object({?d})", .{ sym_name, sym_loc.sym_index, sym_loc.file });
     }
 
     log.debug("GOT entries:", .{});
-- 
cgit v1.2.3


From 215fce8c51662970d34ae1f4bf1cd043071fea8a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 12:46:51 +0200
Subject: coff: fix tracking of got and import entries; free relocs in update*
 fns

---
 src/link/Coff.zig | 132 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 81 insertions(+), 51 deletions(-)

(limited to 'src')

diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 2abfc78d1e..49263df225 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -63,11 +63,13 @@ globals_free_list: std.ArrayListUnmanaged(u32) = .{},
 strtab: StringTable(.strtab) = .{},
 strtab_offset: ?u32 = null,
 
-got_entries: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+got_entries: std.ArrayListUnmanaged(Entry) = .{},
 got_entries_free_list: std.ArrayListUnmanaged(u32) = .{},
+got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 
-imports_table: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
-imports_table_free_list: std.ArrayListUnmanaged(u32) = .{},
+imports: std.ArrayListUnmanaged(Entry) = .{},
+imports_free_list: std.ArrayListUnmanaged(u32) = .{},
+imports_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
 
 /// Virtual address of the entry point procedure relative to image base.
 entry_addr: ?u32 = null,
@@ -115,6 +117,12 @@ relocs: RelocTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 base_relocs: BaseRelocationTable = .{},
 
+const Entry = struct {
+    target: SymbolWithLoc,
+    // Index into the synthetic symbol table (i.e., file == null).
+    sym_index: u32,
+};
+
 pub const Reloc = struct {
     @"type": enum {
         got,
@@ -309,8 +317,10 @@ pub fn deinit(self: *Coff) void {
     self.strtab.deinit(gpa);
     self.got_entries.deinit(gpa);
     self.got_entries_free_list.deinit(gpa);
+    self.got_entries_table.deinit(gpa);
+    self.imports.deinit(gpa);
+    self.imports_free_list.deinit(gpa);
     self.imports_table.deinit(gpa);
-    self.imports_table_free_list.deinit(gpa);
     self.decls.deinit(gpa);
     self.atom_by_index_table.deinit(gpa);
 
@@ -684,42 +694,44 @@ fn allocateGlobal(self: *Coff) !u32 {
 pub fn allocateGotEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
     try self.got_entries.ensureUnusedCapacity(gpa, 1);
+
     const index: u32 = blk: {
         if (self.got_entries_free_list.popOrNull()) |index| {
             log.debug("  (reusing GOT entry index {d})", .{index});
-            if (self.got_entries.getIndex(target)) |existing| {
-                assert(existing == index);
-            }
             break :blk index;
         } else {
-            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.keys().len});
-            const index = @intCast(u32, self.got_entries.keys().len);
-            self.got_entries.putAssumeCapacityNoClobber(target, 0);
+            log.debug("  (allocating GOT entry at index {d})", .{self.got_entries.items.len});
+            const index = @intCast(u32, self.got_entries.items.len);
+            _ = self.got_entries.addOneAssumeCapacity();
             break :blk index;
         }
     };
-    self.got_entries.keys()[index] = target;
+
+    self.got_entries.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.got_entries_table.putNoClobber(gpa, target, index);
+
     return index;
 }
 
 pub fn allocateImportEntry(self: *Coff, target: SymbolWithLoc) !u32 {
     const gpa = self.base.allocator;
-    try self.imports_table.ensureUnusedCapacity(gpa, 1);
+    try self.imports.ensureUnusedCapacity(gpa, 1);
+
     const index: u32 = blk: {
-        if (self.imports_table_free_list.popOrNull()) |index| {
+        if (self.imports_free_list.popOrNull()) |index| {
             log.debug("  (reusing import entry index {d})", .{index});
-            if (self.imports_table.getIndex(target)) |existing| {
-                assert(existing == index);
-            }
             break :blk index;
         } else {
-            log.debug("  (allocating import entry at index {d})", .{self.imports_table.keys().len});
-            const index = @intCast(u32, self.imports_table.keys().len);
-            self.imports_table.putAssumeCapacityNoClobber(target, 0);
+            log.debug("  (allocating import entry at index {d})", .{self.imports.items.len});
+            const index = @intCast(u32, self.imports.items.len);
+            _ = self.imports.addOneAssumeCapacity();
             break :blk index;
         }
     };
-    self.imports_table.keys()[index] = target;
+
+    self.imports.items[index] = .{ .target = target, .sym_index = 0 };
+    try self.imports_table.putNoClobber(gpa, target, index);
+
     return index;
 }
 
@@ -734,7 +746,6 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
-    self.got_entries.getPtr(target).?.* = atom.sym_index;
 
     const sym = atom.getSymbolPtr(self);
     sym.section_number = @intToEnum(coff.SectionNumber, self.got_section_index.? + 1);
@@ -762,7 +773,7 @@ fn createGotAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
     return atom;
 }
 
-fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
+fn createImportAtom(self: *Coff) !*Atom {
     const gpa = self.base.allocator;
     const atom = try gpa.create(Atom);
     errdefer gpa.destroy(atom);
@@ -773,7 +784,6 @@ fn createImportAtom(self: *Coff, target: SymbolWithLoc) !*Atom {
 
     try self.managed_atoms.append(gpa, atom);
     try self.atom_by_index_table.putNoClobber(gpa, atom.sym_index, atom);
-    self.imports_table.getPtr(target).?.* = atom.sym_index;
 
     const sym = atom.getSymbolPtr(self);
     sym.section_number = @intToEnum(coff.SectionNumber, self.idata_section_index.? + 1);
@@ -804,7 +814,7 @@ fn writeAtom(self: *Coff, atom: *Atom, code: []const u8) !void {
     const sym = atom.getSymbol(self);
     const section = self.sections.get(@enumToInt(sym.section_number) - 1);
     const file_offset = section.header.pointer_to_raw_data + sym.value - section.header.virtual_address;
-    log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset });
+    log.debug("writing atom for symbol {s} at file offset 0x{x} to 0x{x}", .{ atom.getName(self), file_offset, file_offset + code.len });
     try self.base.file.?.pwriteAll(code, file_offset);
     try self.resolveRelocs(atom);
 }
@@ -860,11 +870,12 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
         const target_vaddr = target_atom.getSymbol(self).value;
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
 
-        log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
+        log.debug("  ({x}: [() => 0x{x} ({s})) ({s}) (in file at 0x{x})", .{
             source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
+            file_offset + reloc.offset,
         });
 
         reloc.dirty = false;
@@ -901,8 +912,7 @@ fn freeAtom(self: *Coff, atom: *Atom) void {
     log.debug("freeAtom {*}", .{atom});
 
     // Remove any relocs and base relocs associated with this Atom
-    _ = self.relocs.remove(atom);
-    _ = self.base_relocs.remove(atom);
+    self.freeRelocationsForAtom(atom);
 
     const sym = atom.getSymbol(self);
     const sect_id = @enumToInt(sym.section_number) - 1;
@@ -966,11 +976,14 @@ pub fn updateFunc(self: *Coff, module: *Module, func: *Module.Fn, air: Air, live
     const tracy = trace(@src());
     defer tracy.end();
 
+    const decl_index = func.owner_decl;
+    const decl = module.declPtr(decl_index);
+    self.freeUnnamedConsts(decl_index);
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
-    const decl_index = func.owner_decl;
-    const decl = module.declPtr(decl_index);
     const res = try codegen.generateFunction(
         &self.base,
         decl.srcLoc(),
@@ -1082,6 +1095,8 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !
         }
     }
 
+    self.freeRelocationsForAtom(&decl.link.coff);
+
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
 
@@ -1190,8 +1205,9 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
         sym.value = vaddr;
 
         const got_target = SymbolWithLoc{ .sym_index = atom.sym_index, .file = null };
-        _ = try self.allocateGotEntry(got_target);
+        const got_index = try self.allocateGotEntry(got_target);
         const got_atom = try self.createGotAtom(got_target);
+        self.got_entries.items[got_index].sym_index = got_atom.sym_index;
         try self.writePtrWidthAtom(got_atom);
     }
 
@@ -1199,6 +1215,11 @@ fn updateDeclCode(self: *Coff, decl_index: Module.Decl.Index, code: []const u8,
     try self.writeAtom(atom, code);
 }
 
+fn freeRelocationsForAtom(self: *Coff, atom: *Atom) void {
+    _ = self.relocs.remove(atom);
+    _ = self.base_relocs.remove(atom);
+}
+
 fn freeUnnamedConsts(self: *Coff, decl_index: Module.Decl.Index) void {
     const gpa = self.base.allocator;
     const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
@@ -1237,14 +1258,20 @@ pub fn freeDecl(self: *Coff, decl_index: Module.Decl.Index) void {
 
         // Try freeing GOT atom if this decl had one
         const got_target = SymbolWithLoc{ .sym_index = sym_index, .file = null };
-        if (self.got_entries.getIndex(got_target)) |got_index| {
+        if (self.got_entries_table.get(got_target)) |got_index| {
             self.got_entries_free_list.append(gpa, @intCast(u32, got_index)) catch {};
-            self.got_entries.values()[got_index] = 0;
+            self.got_entries.items[got_index] = .{
+                .target = .{ .sym_index = 0, .file = null },
+                .sym_index = 0,
+            };
+            _ = self.got_entries_table.remove(got_target);
+
             log.debug("  adding GOT index {d} to free list (target local@{d})", .{ got_index, sym_index });
         }
 
         self.locals.items[sym_index].section_number = .UNDEFINED;
         _ = self.atom_by_index_table.remove(sym_index);
+        log.debug("  adding local symbol index {d} to free list", .{sym_index});
         decl.link.coff.sym_index = 0;
     }
 }
@@ -1453,8 +1480,9 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
         const global = self.globals.items[entry.key];
         if (self.imports_table.contains(global)) continue;
 
-        _ = try self.allocateImportEntry(global);
-        const import_atom = try self.createImportAtom(global);
+        const import_index = try self.allocateImportEntry(global);
+        const import_atom = try self.createImportAtom();
+        self.imports.items[import_index].sym_index = import_atom.sym_index;
         try self.writePtrWidthAtom(import_atom);
     }
 
@@ -1668,8 +1696,8 @@ fn writeImportTable(self: *Coff) !void {
     defer names_table.deinit();
 
     // TODO: check if import is still valid
-    for (self.imports_table.keys()) |target| {
-        const target_name = self.getSymbolName(target);
+    for (self.imports.items) |entry| {
+        const target_name = self.getSymbolName(entry.target);
         const start = names_table.items.len;
         mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
         try names_table.appendSlice(target_name);
@@ -2013,19 +2041,19 @@ pub fn getEntryPoint(self: Coff) ?SymbolWithLoc {
     return self.globals.items[global_index];
 }
 
-/// Returns pointer-to-symbol described by `sym_with_loc` descriptor.
+/// Returns pointer-to-symbol described by `sym_loc` descriptor.
 pub fn getSymbolPtr(self: *Coff, sym_loc: SymbolWithLoc) *coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns symbol described by `sym_with_loc` descriptor.
+/// Returns symbol described by `sym_loc` descriptor.
 pub fn getSymbol(self: *const Coff, sym_loc: SymbolWithLoc) *const coff.Symbol {
     assert(sym_loc.file == null); // TODO linking object files
     return &self.locals.items[sym_loc.sym_index];
 }
 
-/// Returns name of the symbol described by `sym_with_loc` descriptor.
+/// Returns name of the symbol described by `sym_loc` descriptor.
 pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     assert(sym_loc.file == null); // TODO linking object files
     const sym = self.getSymbol(sym_loc);
@@ -2033,25 +2061,27 @@ pub fn getSymbolName(self: *const Coff, sym_loc: SymbolWithLoc) []const u8 {
     return self.strtab.get(offset).?;
 }
 
-/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor.
+/// Returns atom if there is an atom referenced by the symbol described by `sym_loc` descriptor.
 /// Returns null on failure.
 pub fn getAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     assert(sym_loc.file == null); // TODO linking with object files
     return self.atom_by_index_table.get(sym_loc.sym_index);
 }
 
-/// Returns GOT atom that references `sym_with_loc` if one exists.
+/// Returns GOT atom that references `sym_loc` if one exists.
 /// Returns null otherwise.
 pub fn getGotAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
-    const got_index = self.got_entries.get(sym_loc) orelse return null;
-    return self.atom_by_index_table.get(got_index);
+    const got_index = self.got_entries_table.get(sym_loc) orelse return null;
+    const got_entry = self.got_entries.items[got_index];
+    return self.getAtomForSymbol(.{ .sym_index = got_entry.sym_index, .file = null });
 }
 
-/// Returns import atom that references `sym_with_loc` if one exists.
+/// Returns import atom that references `sym_loc` if one exists.
 /// Returns null otherwise.
 pub fn getImportAtomForSymbol(self: *Coff, sym_loc: SymbolWithLoc) ?*Atom {
     const imports_index = self.imports_table.get(sym_loc) orelse return null;
-    return self.atom_by_index_table.get(imports_index);
+    const imports_entry = self.imports.items[imports_index];
+    return self.getAtomForSymbol(.{ .sym_index = imports_entry.sym_index, .file = null });
 }
 
 fn setSectionName(self: *Coff, header: *coff.SectionHeader, name: []const u8) !void {
@@ -2141,21 +2171,21 @@ fn logSymtab(self: *Coff) void {
     }
 
     log.debug("GOT entries:", .{});
-    for (self.got_entries.keys()) |target, i| {
-        const got_sym = self.getSymbol(.{ .sym_index = self.got_entries.values()[i], .file = null });
-        const target_sym = self.getSymbol(target);
+    for (self.got_entries.items) |entry, i| {
+        const got_sym = self.getSymbol(.{ .sym_index = entry.sym_index, .file = null });
+        const target_sym = self.getSymbol(entry.target);
         if (target_sym.section_number == .UNDEFINED) {
             log.debug("  {d}@{x} => import('{s}')", .{
                 i,
                 got_sym.value,
-                self.getSymbolName(target),
+                self.getSymbolName(entry.target),
             });
         } else {
             log.debug("  {d}@{x} => local(%{d}) in object({?d}) {s}", .{
                 i,
                 got_sym.value,
-                target.sym_index,
-                target.file,
+                entry.target.sym_index,
+                entry.target.file,
                 logSymAttributes(target_sym, &buf),
             });
         }
-- 
cgit v1.2.3


From 8ef1c62f2efc4a43e8b38d3cacf4fd930add7f46 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 15:29:21 +0200
Subject: macho: properly close file handles owned by the linker in deinit()

---
 src/link/MachO.zig | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index af25441066..bceaa55d8a 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -3013,6 +3013,7 @@ pub fn deinit(self: *MachO) void {
     }
 
     if (self.d_sym) |*d_sym| {
+        d_sym.file.close();
         d_sym.deinit(gpa);
     }
 
@@ -3041,6 +3042,7 @@ pub fn deinit(self: *MachO) void {
     self.objects.deinit(gpa);
 
     for (self.archives.items) |*archive| {
+        archive.file.close();
         archive.deinit(gpa);
     }
     self.archives.deinit(gpa);
@@ -3086,15 +3088,6 @@ pub fn deinit(self: *MachO) void {
     self.atom_by_index_table.deinit(gpa);
 }
 
-pub fn closeFiles(self: MachO) void {
-    for (self.archives.items) |archive| {
-        archive.file.close();
-    }
-    if (self.d_sym) |ds| {
-        ds.file.close();
-    }
-}
-
 fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void {
     log.debug("freeAtom {*}", .{atom});
     if (!owns_atom) {
-- 
cgit v1.2.3


From 639237c7b4655d7d4c38c24d9b36a145bbfb1e1c Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 16:10:19 +0200
Subject: macho: set file instance in linkOneShot only if not already set

---
 src/link/MachO.zig | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index bceaa55d8a..e1392d8903 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -793,11 +793,13 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         }
     } else {
         const sub_path = self.base.options.emit.?.sub_path;
-        self.base.file = try directory.handle.createFile(sub_path, .{
-            .truncate = true,
-            .read = true,
-            .mode = link.determineMode(self.base.options),
-        });
+        if (self.base.file == null) {
+            self.base.file = try directory.handle.createFile(sub_path, .{
+                .truncate = true,
+                .read = true,
+                .mode = link.determineMode(self.base.options),
+            });
+        }
         // Index 0 is always a null symbol.
         try self.locals.append(gpa, .{
             .n_strx = 0,
-- 
cgit v1.2.3


From 678e07b924a717e79f412b78895ead1136a722bc Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 19:16:30 +0200
Subject: macho+wasm: unify and clean up closing file handles

---
 src/link/MachO.zig              | 3 ---
 src/link/MachO/Archive.zig      | 1 +
 src/link/MachO/DebugSymbols.zig | 1 +
 src/link/Wasm.zig               | 2 --
 src/link/Wasm/Archive.zig       | 1 +
 src/link/Wasm/Object.zig        | 3 +++
 6 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index e1392d8903..ef180ab032 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1437,7 +1437,6 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool {
 
     if (force_load) {
         defer archive.deinit(gpa);
-        defer file.close();
         // Get all offsets from the ToC
         var offsets = std.AutoArrayHashMap(u32, void).init(gpa);
         defer offsets.deinit();
@@ -3015,7 +3014,6 @@ pub fn deinit(self: *MachO) void {
     }
 
     if (self.d_sym) |*d_sym| {
-        d_sym.file.close();
         d_sym.deinit(gpa);
     }
 
@@ -3044,7 +3042,6 @@ pub fn deinit(self: *MachO) void {
     self.objects.deinit(gpa);
 
     for (self.archives.items) |*archive| {
-        archive.file.close();
         archive.deinit(gpa);
     }
     self.archives.deinit(gpa);
diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig
index 054f75fff3..59a956534e 100644
--- a/src/link/MachO/Archive.zig
+++ b/src/link/MachO/Archive.zig
@@ -88,6 +88,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(self: *Archive, allocator: Allocator) void {
+    self.file.close();
     for (self.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig
index a7dc6391c2..ffff0fe5f8 100644
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@@ -306,6 +306,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti
 }
 
 pub fn deinit(self: *DebugSymbols, allocator: Allocator) void {
+    self.file.close();
     self.segments.deinit(allocator);
     self.sections.deinit(allocator);
     self.dwarf.deinit();
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 050d9287a5..8c73336e9f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -648,12 +648,10 @@ pub fn deinit(self: *Wasm) void {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
-        object.file.?.close();
         object.deinit(gpa);
     }
 
     for (self.archives.items) |*archive| {
-        archive.file.close();
         archive.deinit(gpa);
     }
 
diff --git a/src/link/Wasm/Archive.zig b/src/link/Wasm/Archive.zig
index c80d26d17d..b1cce15b1d 100644
--- a/src/link/Wasm/Archive.zig
+++ b/src/link/Wasm/Archive.zig
@@ -95,6 +95,7 @@ const ar_hdr = extern struct {
 };
 
 pub fn deinit(archive: *Archive, allocator: Allocator) void {
+    archive.file.close();
     for (archive.toc.keys()) |*key| {
         allocator.free(key.*);
     }
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
index 50827ca9fb..b182fdfcae 100644
--- a/src/link/Wasm/Object.zig
+++ b/src/link/Wasm/Object.zig
@@ -141,6 +141,9 @@ pub fn create(gpa: Allocator, file: std.fs.File, name: []const u8, maybe_max_siz
 /// Frees all memory of `Object` at once. The given `Allocator` must be
 /// the same allocator that was used when `init` was called.
 pub fn deinit(self: *Object, gpa: Allocator) void {
+    if (self.file) |file| {
+        file.close();
+    }
     for (self.func_types) |func_ty| {
         gpa.free(func_ty.params);
         gpa.free(func_ty.returns);
-- 
cgit v1.2.3


From a226aef36cc441be4af675e24b3e672fe3fe2d5a Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 19:16:54 +0200
Subject: test-cases: enable stage2 tests on Windows

---
 ci/azure/pipelines.yml | 3 +--
 src/test.zig           | 9 +++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/ci/azure/pipelines.yml b/ci/azure/pipelines.yml
index 633c6389d0..86f92f5ef6 100644
--- a/ci/azure/pipelines.yml
+++ b/ci/azure/pipelines.yml
@@ -73,8 +73,7 @@ jobs:
       & "$ZIGINSTALLDIR\bin\zig.exe" build test docs `
         --search-prefix "$ZIGPREFIXPATH" `
         -Dstatic-llvm `
-        -Dskip-non-native `
-        -Dskip-stage2-tests
+        -Dskip-non-native
       CheckLastExitCode
     name: test
     displayName: 'Test'
diff --git a/src/test.zig b/src/test.zig
index babded13f9..dfe5f5c866 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -177,6 +177,8 @@ const TestManifestConfigDefaults = struct {
                 inline for (&[_][]const u8{ "x86_64", "aarch64" }) |arch| {
                     defaults = defaults ++ arch ++ "-macos" ++ ",";
                 }
+                // Windows
+                defaults = defaults ++ "x86_64-windows" ++ ",";
                 // Wasm
                 defaults = defaults ++ "wasm32-wasi";
                 return defaults;
@@ -1546,6 +1548,13 @@ pub const TestContext = struct {
             .self_exe_path = std.testing.zig_exe_path,
             // TODO instead of turning off color, pass in a std.Progress.Node
             .color = .off,
+            // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
+            // our own COFF linker doesn't yet support these options.
+            .want_unwind_tables = switch (case.backend) {
+                .stage2 => if (target.os.tag == .windows) false else null,
+                else => null,
+            },
+            .emit_implib = null,
         });
         defer comp.destroy();
 
-- 
cgit v1.2.3


From 0e152b76ac0da0f8132091202eba9f6cebd0e616 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Wed, 7 Sep 2022 23:16:42 +0200
Subject: tests: force LLD off for stage2 backends until auto-select deems
 worthy

---
 src/test.zig   | 9 ++++-----
 test/tests.zig | 9 ++-------
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/test.zig b/src/test.zig
index dfe5f5c866..e824b59066 100644
--- a/src/test.zig
+++ b/src/test.zig
@@ -1548,13 +1548,12 @@ pub const TestContext = struct {
             .self_exe_path = std.testing.zig_exe_path,
             // TODO instead of turning off color, pass in a std.Progress.Node
             .color = .off,
-            // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
-            // our own COFF linker doesn't yet support these options.
-            .want_unwind_tables = switch (case.backend) {
-                .stage2 => if (target.os.tag == .windows) false else null,
+            // TODO: force self-hosted linkers with stage2 backend to avoid LLD creeping in
+            //       until the auto-select mechanism deems them worthy
+            .use_lld = switch (case.backend) {
+                .stage2 => false,
                 else => null,
             },
-            .emit_implib = null,
         });
         defer comp.destroy();
 
diff --git a/test/tests.zig b/test/tests.zig
index f46a3acbb5..53e58156a4 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -701,13 +701,8 @@ pub fn addPkgTests(
             else => {
                 these_tests.use_stage1 = false;
                 these_tests.use_llvm = false;
-
-                if (test_target.target.getOsTag() == .windows) {
-                    // TODO: We set these to no so that we don't fallback to LLD for incremental linking context. This is because
-                    // our own COFF linker doesn't yet support these options.
-                    these_tests.emit_implib = .no_emit;
-                    these_tests.use_unwind_tables = false;
-                }
+                // TODO: force self-hosted linkers to avoid LLD creeping in until the auto-select mechanism deems them worthy
+                these_tests.use_lld = false;
             },
         };
 
-- 
cgit v1.2.3


From 0ae2ea671b867e5ecd0bc779405c175f33316559 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Thu, 8 Sep 2022 14:29:51 +0200
Subject: wasm: temporarily save curr file pointer before pwriting on Win

This is a temporary workaround to an unclear platform-dependence
behavior we have in libstd for `std.fs.File` abstraction. See
https://github.com/ziglang/zig/issues/12783 for more information.
---
 lib/std/fs/file.zig | 16 ++++++++++++++++
 src/link/Wasm.zig   | 16 ++++++++++++++--
 test/link.zig       | 14 +++++++-------
 3 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig
index 5de746150b..3792e1c1f2 100644
--- a/lib/std/fs/file.zig
+++ b/lib/std/fs/file.zig
@@ -990,6 +990,8 @@ pub const File = struct {
         return index;
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize {
         if (is_windows) {
             return windows.ReadFile(self.handle, buffer, offset, self.intended_io_mode);
@@ -1004,6 +1006,8 @@ pub const File = struct {
 
     /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
     /// means the file reached the end. Reaching the end of a file is not an error condition.
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadAll(self: File, buffer: []u8, offset: u64) PReadError!usize {
         var index: usize = 0;
         while (index != buffer.len) {
@@ -1058,6 +1062,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadv(self: File, iovecs: []const os.iovec, offset: u64) PReadError!usize {
         if (is_windows) {
             // TODO improve this to use ReadFileScatter
@@ -1079,6 +1085,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial reads from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn preadvAll(self: File, iovecs: []os.iovec, offset: u64) PReadError!usize {
         if (iovecs.len == 0) return 0;
 
@@ -1122,6 +1130,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize {
         if (is_windows) {
             return windows.WriteFile(self.handle, bytes, offset, self.intended_io_mode);
@@ -1134,6 +1144,8 @@ pub const File = struct {
         }
     }
 
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwriteAll(self: File, bytes: []const u8, offset: u64) PWriteError!void {
         var index: usize = 0;
         while (index < bytes.len) {
@@ -1179,6 +1191,8 @@ pub const File = struct {
     }
 
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritev(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!usize {
         if (is_windows) {
             // TODO improve this to use WriteFileScatter
@@ -1197,6 +1211,8 @@ pub const File = struct {
     /// The `iovecs` parameter is mutable because this function needs to mutate the fields in
     /// order to handle partial writes from the underlying OS layer.
     /// See https://github.com/ziglang/zig/issues/7699
+    /// On Windows, this function currently does alter the file pointer.
+    /// https://github.com/ziglang/zig/issues/12783
     pub fn pwritevAll(self: File, iovecs: []os.iovec_const, offset: u64) PWriteError!void {
         if (iovecs.len == 0) return;
 
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 8c73336e9f..f0f50049b8 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -3055,14 +3055,26 @@ fn writeVecSectionHeader(file: fs.File, offset: u64, section: wasm.Section, size
     buf[0] = @enumToInt(section);
     leb.writeUnsignedFixed(5, buf[1..6], size);
     leb.writeUnsignedFixed(5, buf[6..], items);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn writeCustomSectionHeader(file: fs.File, offset: u64, size: u32) !void {
     var buf: [1 + 5]u8 = undefined;
     buf[0] = 0; // 0 = 'custom' section
     leb.writeUnsignedFixed(5, buf[1..6], size);
-    try file.pwriteAll(&buf, offset);
+
+    if (builtin.target.os.tag == .windows) {
+        // https://github.com/ziglang/zig/issues/12783
+        const curr_pos = try file.getPos();
+        try file.pwriteAll(&buf, offset);
+        try file.seekTo(curr_pos);
+    } else try file.pwriteAll(&buf, offset);
 }
 
 fn emitLinkSection(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *std.AutoArrayHashMap(SymbolLoc, u32)) !void {
diff --git a/test/link.zig b/test/link.zig
index b68353122c..d1dcbbc292 100644
--- a/test/link.zig
+++ b/test/link.zig
@@ -28,35 +28,35 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
 }
 
 fn addWasmCases(cases: *tests.StandaloneContext) void {
-    cases.addBuildFile("test/link/wasm/bss/build.zig", .{
+    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
-    cases.addBuildFile("test/link/wasm/segments/build.zig", .{
+    cases.addBuildFile("test/link/wasm/bss/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
-    cases.addBuildFile("test/link/wasm/stack_pointer/build.zig", .{
+    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
+        .use_emulation = true,
     });
 
-    cases.addBuildFile("test/link/wasm/type/build.zig", .{
+    cases.addBuildFile("test/link/wasm/segments/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
-    cases.addBuildFile("test/link/wasm/archive/build.zig", .{
+    cases.addBuildFile("test/link/wasm/stack_pointer/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
     });
 
-    cases.addBuildFile("test/link/wasm/extern/build.zig", .{
+    cases.addBuildFile("test/link/wasm/type/build.zig", .{
         .build_modes = true,
         .requires_stage2 = true,
-        .use_emulation = true,
     });
 }
 
-- 
cgit v1.2.3


From 8378cde74369ddb1cc618d444970e963a4ab1110 Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 9 Sep 2022 00:01:20 +0200
Subject: macho: prefill any space between __DATA and __LINKEDIT with 0s if
 required

If there are zerofill sections, the loader may copy the contents of
the physical space in file directly into memory and attach that to
the zerofill section. This is a performance optimisation in the loader
but requires us, the linker, to properly zero-out any space between
__DATA and __LINKEDIT segments in file. This is of course completely
skipped if there are no zerofill sections present.
---
 src/link/MachO.zig | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index ef180ab032..1ab0202b44 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1157,6 +1157,28 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
         var ncmds: u32 = 0;
 
         try self.writeLinkeditSegmentData(&ncmds, lc_writer);
+
+        // If the last section of __DATA segment is zerofill section, we need to ensure
+        // that the free space between the end of the last non-zerofill section of __DATA
+        // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will
+        // copy-paste this space into memory for quicker zerofill operation.
+        if (self.data_segment_cmd_index) |data_seg_id| blk: {
+            var physical_zerofill_start: u64 = 0;
+            const section_indexes = self.getSectionIndexes(data_seg_id);
+            for (self.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| {
+                if (header.isZerofill() and header.size > 0) break;
+                physical_zerofill_start = header.offset + header.size;
+            } else break :blk;
+            const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?];
+            const physical_zerofill_size = linkedit.fileoff - physical_zerofill_start;
+            if (physical_zerofill_size > 0) {
+                var padding = try self.base.allocator.alloc(u8, physical_zerofill_size);
+                defer self.base.allocator.free(padding);
+                mem.set(u8, padding, 0);
+                try self.base.file.?.pwriteAll(padding, physical_zerofill_start);
+            }
+        }
+
         try writeDylinkerLC(&ncmds, lc_writer);
         try self.writeMainLC(&ncmds, lc_writer);
         try self.writeDylibIdLC(&ncmds, lc_writer);
@@ -5690,8 +5712,10 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void {
         else => unreachable,
     }
 
-    if (self.getSectionByName("__DATA", "__thread_vars")) |_| {
-        header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+    if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| {
+        if (self.sections.items(.header)[sect_id].size > 0) {
+            header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
+        }
     }
 
     header.ncmds = ncmds;
-- 
cgit v1.2.3


From 5006fb6846ccaa7edb1547588cf1aa08c8decf2b Mon Sep 17 00:00:00 2001
From: Jakub Konka <kubkon@jakubkonka.com>
Date: Fri, 9 Sep 2022 08:30:27 +0200
Subject: macho: fix compilation for 32bit targets

---
 src/link/MachO.zig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 1ab0202b44..429bf64eb2 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -1170,7 +1170,8 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node)
                 physical_zerofill_start = header.offset + header.size;
             } else break :blk;
             const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?];
-            const physical_zerofill_size = linkedit.fileoff - physical_zerofill_start;
+            const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse
+                return error.Overflow;
             if (physical_zerofill_size > 0) {
                 var padding = try self.base.allocator.alloc(u8, physical_zerofill_size);
                 defer self.base.allocator.free(padding);
-- 
cgit v1.2.3