From 9eb400ef19391261a3b61129d8665602c89959c5 Mon Sep 17 00:00:00 2001
From: mlugg <mlugg@mlugg.co.uk>
Date: Thu, 29 May 2025 05:38:55 +0100
Subject: compiler: rework backend pipeline to separate codegen and link

The idea here is that instead of the linker calling into codegen,
instead codegen should run before we touch the linker, and after MIR is
produced, it is sent to the linker. Aside from simplifying the call
graph (by preventing N linkers from each calling into M codegen
backends!), this has the huge benefit that it is possible to
parallellize codegen separately from linking. The threading model can
look like this:

* 1 semantic analysis thread, which generates AIR
* N codegen threads, which process AIR into MIR
* 1 linker thread, which emits MIR to the binary

The codegen threads are also responsible for `Air.Legalize` and
`Air.Liveness`; it's more efficient to do this work here instead of
blocking the main thread for this trivially parallel task.

I have repurposed the `Zcu.Feature.separate_thread` backend feature to
indicate support for this 1:N:1 threading pattern. This commit makes the
C backend support this feature, since it was relatively easy to divorce
from `link.C`: it just required eliminating some shared buffers. Other
backends don't currently support this feature. In fact, they don't even
compile -- the next few commits will fix them back up.
---
 src/link/Queue.zig | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 src/link/Queue.zig

(limited to 'src/link/Queue.zig')

diff --git a/src/link/Queue.zig b/src/link/Queue.zig
new file mode 100644
index 0000000000..c73a0e9684
--- /dev/null
+++ b/src/link/Queue.zig
@@ -0,0 +1,234 @@
+//! Stores and manages the queue of link tasks. Each task is either a `PrelinkTask` or a `ZcuTask`.
+//!
+//! There must be at most one link thread (the thread processing these tasks) active at a time. If
+//! `!comp.separateCodegenThreadOk()`, then ZCU tasks will be run on the main thread, bypassing this
+//! queue entirely.
+//!
+//! All prelink tasks must be processed before any ZCU tasks are processed. After all prelink tasks
+//! are run, but before any ZCU tasks are run, `prelink` must be called on the `link.File`.
+//!
+//! There will sometimes be a `ZcuTask` in the queue which is not yet ready because it depends on
+//! MIR which has not yet been generated by any codegen thread. In this case, we must pause
+//! processing of linker tasks until the MIR is ready. It would be incorrect to run any other link
+//! tasks first, since this would make builds unreproducible.
+
+mutex: std.Thread.Mutex,
+/// Validates that only one `flushTaskQueue` thread is running at a time.
+flush_safety: std.debug.SafetyLock,
+
+/// This is the number of prelink tasks which are expected but have not yet been enqueued.
+/// Guarded by `mutex`.
+pending_prelink_tasks: u32,
+
+/// Prelink tasks which have been enqueued and are not yet owned by the worker thread.
+/// Allocated into `gpa`, guarded by `mutex`.
+queued_prelink: std.ArrayListUnmanaged(PrelinkTask),
+/// The worker thread moves items from `queued_prelink` into this array in order to process them.
+/// Allocated into `gpa`, accessed only by the worker thread.
+wip_prelink: std.ArrayListUnmanaged(PrelinkTask),
+
+/// Like `queued_prelink`, but for ZCU tasks.
+/// Allocated into `gpa`, guarded by `mutex`.
+queued_zcu: std.ArrayListUnmanaged(ZcuTask),
+/// Like `wip_prelink`, but for ZCU tasks.
+/// Allocated into `gpa`, accessed only by the worker thread.
+wip_zcu: std.ArrayListUnmanaged(ZcuTask),
+
+/// When processing ZCU link tasks, we might have to block due to unpopulated MIR. When this
+/// happens, some tasks in `wip_zcu` have been run, and some are still pending. This is the
+/// index into `wip_zcu` which we have reached.
+wip_zcu_idx: usize,
+
+/// Guarded by `mutex`.
+state: union(enum) {
+    /// The link thread is currently running or queued to run.
+    running,
+    /// The link thread is not running or queued, because it has exhausted all immediately available
+    /// tasks. It should be spawned when more tasks are enqueued. If `pending_prelink_tasks` is not
+    /// zero, we are specifically waiting for prelink tasks.
+    finished,
+    /// The link thread is not running or queued, because it is waiting for this MIR to be populated.
+    /// Once codegen completes, it must call `mirReady` which will restart the link thread.
+    wait_for_mir: *ZcuTask.LinkFunc.SharedMir,
+},
+
+/// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
+/// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`.
+pub const empty: Queue = .{
+    .mutex = .{},
+    .flush_safety = .{},
+    .pending_prelink_tasks = 0,
+    .queued_prelink = .empty,
+    .wip_prelink = .empty,
+    .queued_zcu = .empty,
+    .wip_zcu = .empty,
+    .wip_zcu_idx = 0,
+    .state = .finished,
+};
+/// `lf` is needed to correctly deinit any pending `ZcuTask`s.
+pub fn deinit(q: *Queue, comp: *Compilation) void {
+    const gpa = comp.gpa;
+    for (q.queued_zcu.items) |t| t.deinit(comp.zcu.?);
+    for (q.wip_zcu.items[q.wip_zcu_idx..]) |t| t.deinit(comp.zcu.?);
+    q.queued_prelink.deinit(gpa);
+    q.wip_prelink.deinit(gpa);
+    q.queued_zcu.deinit(gpa);
+    q.wip_zcu.deinit(gpa);
+}
+
+/// This is expected to be called exactly once, after which the caller must not directly access
+/// `queued_prelink` or `pending_prelink_tasks` any longer. This will spawn the link thread if
+/// necessary.
+pub fn start(q: *Queue, comp: *Compilation) void {
+    assert(q.state == .finished);
+    assert(q.queued_zcu.items.len == 0);
+    if (q.queued_prelink.items.len != 0) {
+        q.state = .running;
+        comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+    }
+}
+
+/// Called by codegen workers after they have populated a `ZcuTask.LinkFunc.SharedMir`. If the link
+/// thread was waiting for this MIR, it can resume.
+pub fn mirReady(q: *Queue, comp: *Compilation, mir: *ZcuTask.LinkFunc.SharedMir) void {
+    // We would like to assert that `mir` is not pending, but that would race with a worker thread
+    // potentially freeing it.
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        switch (q.state) {
+            .finished => unreachable, // there's definitely a task queued
+            .running => return,
+            .wait_for_mir => |wait_for| if (wait_for != mir) return,
+        }
+        // We were waiting for `mir`, so we will restart the linker thread.
+        q.state = .running;
+    }
+    assert(mir.status.load(.monotonic) != .pending);
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that `tasks.len` is
+/// less than or equal to `q.pending_prelink_tasks`. Also asserts that `tasks.len` is not 0.
+pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) Allocator.Error!void {
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        try q.queued_prelink.appendSlice(comp.gpa, tasks);
+        q.pending_prelink_tasks -= @intCast(tasks.len);
+        switch (q.state) {
+            .wait_for_mir => unreachable, // we've not started zcu tasks yet
+            .running => return,
+            .finished => {},
+        }
+        // Restart the linker thread, because it was waiting for a task
+        q.state = .running;
+    }
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!void {
+    assert(comp.separateCodegenThreadOk());
+    {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        try q.queued_zcu.append(comp.gpa, task);
+        switch (q.state) {
+            .running, .wait_for_mir => return,
+            .finished => if (q.pending_prelink_tasks != 0) return,
+        }
+        // Restart the linker thread, unless it would immediately be blocked
+        if (task == .link_func and task.link_func.mir.status.load(.monotonic) == .pending) {
+            q.state = .{ .wait_for_mir = task.link_func.mir };
+            return;
+        }
+        q.state = .running;
+    }
+    comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
+fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
+    q.flush_safety.lock();
+    defer q.flush_safety.unlock();
+
+    if (std.debug.runtime_safety) {
+        q.mutex.lock();
+        defer q.mutex.unlock();
+        assert(q.state == .running);
+    }
+    prelink: while (true) {
+        assert(q.wip_prelink.items.len == 0);
+        {
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            std.mem.swap(std.ArrayListUnmanaged(PrelinkTask), &q.queued_prelink, &q.wip_prelink);
+            if (q.wip_prelink.items.len == 0) {
+                if (q.pending_prelink_tasks == 0) {
+                    break :prelink; // prelink is done
+                } else {
+                    // We're expecting more prelink tasks so can't move on to ZCU tasks.
+                    q.state = .finished;
+                    return;
+                }
+            }
+        }
+        for (q.wip_prelink.items) |task| {
+            link.doPrelinkTask(comp, task);
+        }
+        q.wip_prelink.clearRetainingCapacity();
+    }
+
+    // We've finished the prelink tasks, so run prelink if necessary.
+    if (comp.bin_file) |lf| {
+        if (!lf.post_prelink) {
+            if (lf.prelink(comp.work_queue_progress_node)) |_| {
+                lf.post_prelink = true;
+            } else |err| switch (err) {
+                error.OutOfMemory => comp.link_diags.setAllocFailure(),
+                error.LinkFailure => {},
+            }
+        }
+    }
+
+    // Now we can run ZCU tasks.
+    while (true) {
+        if (q.wip_zcu.items.len == q.wip_zcu_idx) {
+            q.wip_zcu.clearRetainingCapacity();
+            q.wip_zcu_idx = 0;
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            std.mem.swap(std.ArrayListUnmanaged(ZcuTask), &q.queued_zcu, &q.wip_zcu);
+            if (q.wip_zcu.items.len == 0) {
+                // We've exhausted all available tasks.
+                q.state = .finished;
+                return;
+            }
+        }
+        const task = q.wip_zcu.items[q.wip_zcu_idx];
+        // If the task is a `link_func`, we might have to stop until its MIR is populated.
+        pending: {
+            if (task != .link_func) break :pending;
+            const status_ptr = &task.link_func.mir.status;
+            // First check without the mutex to optimize for the common case where MIR is ready.
+            if (status_ptr.load(.monotonic) != .pending) break :pending;
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            if (status_ptr.load(.monotonic) != .pending) break :pending;
+            // We will stop for now, and get restarted once this MIR is ready.
+            q.state = .{ .wait_for_mir = task.link_func.mir };
+            return;
+        }
+        link.doZcuTask(comp, tid, task);
+        task.deinit(comp.zcu.?);
+        q.wip_zcu_idx += 1;
+    }
+}
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const Compilation = @import("../Compilation.zig");
+const link = @import("../link.zig");
+const PrelinkTask = link.PrelinkTask;
+const ZcuTask = link.ZcuTask;
+const Queue = @This();
-- 
cgit v1.2.3


From 5ab307cf47b1f0418d9ed4ab56df6fb798305c20 Mon Sep 17 00:00:00 2001
From: mlugg <mlugg@mlugg.co.uk>
Date: Sun, 1 Jun 2025 22:57:59 +0100
Subject: compiler: get most backends compiling again

As of this commit, every backend other than self-hosted Wasm and
self-hosted SPIR-V compiles and (at least somewhat) functions again.
Those two backends are currently disabled with panics.

Note that `Zcu.Feature.separate_thread` is *not* enabled for the fixed
backends. Avoiding linker references from codegen is a non-trivial task,
and can be done after this branch.
---
 src/Compilation.zig          |   8 ++--
 src/Zcu/PerThread.zig        |  28 +++++++++---
 src/arch/aarch64/CodeGen.zig |  46 +++++++------------
 src/arch/aarch64/Mir.zig     |  43 ++++++++++++++++++
 src/arch/arm/CodeGen.zig     |  48 +++++++-------------
 src/arch/arm/Mir.zig         |  43 ++++++++++++++++++
 src/arch/powerpc/CodeGen.zig |  10 ++---
 src/arch/riscv64/CodeGen.zig |  51 ++++++---------------
 src/arch/riscv64/Mir.zig     |  50 +++++++++++++++++++++
 src/arch/sparc64/CodeGen.zig |  45 ++++++-------------
 src/arch/sparc64/Mir.zig     |  39 +++++++++++++++-
 src/arch/x86_64/CodeGen.zig  | 104 ++++++++++++++-----------------------------
 src/arch/x86_64/Mir.zig      |  65 +++++++++++++++++++++++++++
 src/codegen.zig              |   2 +-
 src/libs/freebsd.zig         |   2 +-
 src/libs/glibc.zig           |   2 +-
 src/libs/netbsd.zig          |   2 +-
 src/link.zig                 |   2 +
 src/link/Coff.zig            |  12 ++---
 src/link/Goff.zig            |   9 ++--
 src/link/MachO.zig           |   6 +--
 src/link/MachO/ZigObject.zig |  12 ++---
 src/link/Plan9.zig           |  12 ++---
 src/link/Queue.zig           |   3 +-
 src/link/Xcoff.zig           |   9 ++--
 25 files changed, 402 insertions(+), 251 deletions(-)

(limited to 'src/link/Queue.zig')

diff --git a/src/Compilation.zig b/src/Compilation.zig
index 64ec1ab0a8..e967935539 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -4550,8 +4550,6 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 air.deinit(gpa);
                 return;
             }
-            const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
-            defer pt.deactivate();
             const shared_mir = try gpa.create(link.ZcuTask.LinkFunc.SharedMir);
             shared_mir.* = .{
                 .status = .init(.pending),
@@ -4567,7 +4565,11 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 } });
             } else {
                 const emit_needs_air = !zcu.backendSupportsFeature(.separate_thread);
-                pt.runCodegen(func.func, &air, shared_mir);
+                {
+                    const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid));
+                    defer pt.deactivate();
+                    pt.runCodegen(func.func, &air, shared_mir);
+                }
                 assert(shared_mir.status.load(.monotonic) != .pending);
                 comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
                     .func = func.func,
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
index 92f1adbf2a..6475649a68 100644
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@@ -4376,26 +4376,40 @@ pub fn addDependency(pt: Zcu.PerThread, unit: AnalUnit, dependee: InternPool.Dep
 /// other code. This function is currently run either on the main thread, or on a separate
 /// codegen thread, depending on whether the backend supports `Zcu.Feature.separate_thread`.
 pub fn runCodegen(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air, out: *@import("../link.zig").ZcuTask.LinkFunc.SharedMir) void {
+    const zcu = pt.zcu;
     if (runCodegenInner(pt, func_index, air)) |mir| {
         out.value = mir;
         out.status.store(.ready, .release);
     } else |err| switch (err) {
         error.OutOfMemory => {
-            pt.zcu.comp.setAllocFailure();
+            zcu.comp.setAllocFailure();
             out.status.store(.failed, .monotonic);
         },
         error.CodegenFail => {
-            pt.zcu.assertCodegenFailed(pt.zcu.funcInfo(func_index).owner_nav);
+            zcu.assertCodegenFailed(zcu.funcInfo(func_index).owner_nav);
             out.status.store(.failed, .monotonic);
         },
         error.NoLinkFile => {
-            assert(pt.zcu.comp.bin_file == null);
+            assert(zcu.comp.bin_file == null);
+            out.status.store(.failed, .monotonic);
+        },
+        error.BackendDoesNotProduceMir => {
+            const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm);
+            switch (backend) {
+                else => unreachable, // assertion failure
+                .stage2_llvm => {},
+            }
             out.status.store(.failed, .monotonic);
         },
     }
-    pt.zcu.comp.link_task_queue.mirReady(pt.zcu.comp, out);
+    zcu.comp.link_task_queue.mirReady(zcu.comp, out);
 }
-fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) error{ OutOfMemory, CodegenFail, NoLinkFile }!codegen.AnyMir {
+fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) error{
+    OutOfMemory,
+    CodegenFail,
+    NoLinkFile,
+    BackendDoesNotProduceMir,
+}!codegen.AnyMir {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const ip = &zcu.intern_pool;
@@ -4441,7 +4455,9 @@ fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) e
     // "emit" step because LLVM does not support incremental linking. Our linker (LLD or self-hosted)
     // will just see the ZCU object file which LLVM ultimately emits.
     if (zcu.llvm_object) |llvm_object| {
-        return llvm_object.updateFunc(pt, func_index, air, &liveness);
+        assert(pt.tid == .main); // LLVM has a lot of shared state
+        try llvm_object.updateFunc(pt, func_index, air, &liveness);
+        return error.BackendDoesNotProduceMir;
     }
 
     const lf = comp.bin_file orelse return error.NoLinkFile;
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 00cceb0c67..0c29fd96e2 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -49,7 +49,6 @@ pt: Zcu.PerThread,
 air: Air,
 liveness: Air.Liveness,
 bin_file: *link.File,
-debug_output: link.File.DebugInfoOutput,
 target: *const std.Target,
 func_index: InternPool.Index,
 owner_nav: InternPool.Nav.Index,
@@ -185,6 +184,9 @@ const DbgInfoReloc = struct {
     }
 
     fn genArgDbgInfo(reloc: DbgInfoReloc, function: Self) !void {
+        // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+        //       We aren't allowed to interact with linker state here.
+        if (true) return;
         switch (function.debug_output) {
             .dwarf => |dw| {
                 const loc: link.File.Dwarf.Loc = switch (reloc.mcv) {
@@ -213,6 +215,9 @@ const DbgInfoReloc = struct {
     }
 
     fn genVarDbgInfo(reloc: DbgInfoReloc, function: Self) !void {
+        // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+        //       We aren't allowed to interact with linker state here.
+        if (true) return;
         switch (function.debug_output) {
             .dwarf => |dwarf| {
                 const loc: link.File.Dwarf.Loc = switch (reloc.mcv) {
@@ -326,11 +331,9 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) CodeGenError!Mir {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const func = zcu.funcInfo(func_index);
@@ -349,9 +352,8 @@ pub fn generate(
     var function: Self = .{
         .gpa = gpa,
         .pt = pt,
-        .air = air,
-        .liveness = liveness,
-        .debug_output = debug_output,
+        .air = air.*,
+        .liveness = liveness.*,
         .target = target,
         .bin_file = lf,
         .func_index = func_index,
@@ -395,29 +397,13 @@ pub fn generate(
 
     var mir: Mir = .{
         .instructions = function.mir_instructions.toOwnedSlice(),
-        .extra = try function.mir_extra.toOwnedSlice(gpa),
-    };
-    defer mir.deinit(gpa);
-
-    var emit: Emit = .{
-        .mir = mir,
-        .bin_file = lf,
-        .debug_output = debug_output,
-        .target = target,
-        .src_loc = src_loc,
-        .code = code,
-        .prev_di_pc = 0,
-        .prev_di_line = func.lbrace_line,
-        .prev_di_column = func.lbrace_column,
-        .stack_size = function.max_end_stack,
+        .extra = &.{}, // fallible, so assign after errdefer
+        .max_end_stack = function.max_end_stack,
         .saved_regs_stack_space = function.saved_regs_stack_space,
     };
-    defer emit.deinit();
-
-    emit.emitMir() catch |err| switch (err) {
-        error.EmitFail => return function.failMsg(emit.err_msg.?),
-        else => |e| return e,
-    };
+    errdefer mir.deinit(gpa);
+    mir.extra = try function.mir_extra.toOwnedSlice(gpa);
+    return mir;
 }
 
 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig
index edf05f625e..34fcc64c7e 100644
--- a/src/arch/aarch64/Mir.zig
+++ b/src/arch/aarch64/Mir.zig
@@ -13,6 +13,14 @@ const assert = std.debug.assert;
 
 const bits = @import("bits.zig");
 const Register = bits.Register;
+const InternPool = @import("../../InternPool.zig");
+const Emit = @import("Emit.zig");
+const codegen = @import("../../codegen.zig");
+const link = @import("../../link.zig");
+const Zcu = @import("../../Zcu.zig");
+
+max_end_stack: u32,
+saved_regs_stack_space: u32,
 
 instructions: std.MultiArrayList(Inst).Slice,
 /// The meaning of this data is determined by `Inst.Tag` value.
@@ -498,6 +506,41 @@ pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.* = undefined;
 }
 
+pub fn emit(
+    mir: Mir,
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    code: *std.ArrayListUnmanaged(u8),
+    debug_output: link.File.DebugInfoOutput,
+    air: *const @import("../../Air.zig"),
+) codegen.CodeGenError!void {
+    _ = air; // using this would be a bug
+    const zcu = pt.zcu;
+    const func = zcu.funcInfo(func_index);
+    const nav = func.owner_nav;
+    const mod = zcu.navFileScope(nav).mod.?;
+    var e: Emit = .{
+        .mir = mir,
+        .bin_file = lf,
+        .debug_output = debug_output,
+        .target = &mod.resolved_target.result,
+        .src_loc = src_loc,
+        .code = code,
+        .prev_di_pc = 0,
+        .prev_di_line = func.lbrace_line,
+        .prev_di_column = func.lbrace_column,
+        .stack_size = mir.max_end_stack,
+        .saved_regs_stack_space = mir.saved_regs_stack_space,
+    };
+    defer e.deinit();
+    e.emitMir() catch |err| switch (err) {
+        error.EmitFail => return zcu.codegenFailMsg(nav, e.err_msg.?),
+        else => |e1| return e1,
+    };
+}
+
 /// Returns the requested data, as well as the new index which is at the start of the
 /// trailers for the object.
 pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 421ba7d753..3868011557 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -50,7 +50,6 @@ pt: Zcu.PerThread,
 air: Air,
 liveness: Air.Liveness,
 bin_file: *link.File,
-debug_output: link.File.DebugInfoOutput,
 target: *const std.Target,
 func_index: InternPool.Index,
 err_msg: ?*ErrorMsg,
@@ -264,6 +263,9 @@ const DbgInfoReloc = struct {
     }
 
     fn genArgDbgInfo(reloc: DbgInfoReloc, function: Self) !void {
+        // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+        //       We aren't allowed to interact with linker state here.
+        if (true) return;
         switch (function.debug_output) {
             .dwarf => |dw| {
                 const loc: link.File.Dwarf.Loc = switch (reloc.mcv) {
@@ -292,6 +294,9 @@ const DbgInfoReloc = struct {
     }
 
     fn genVarDbgInfo(reloc: DbgInfoReloc, function: Self) !void {
+        // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+        //       We aren't allowed to interact with linker state here.
+        if (true) return;
         switch (function.debug_output) {
             .dwarf => |dw| {
                 const loc: link.File.Dwarf.Loc = switch (reloc.mcv) {
@@ -335,11 +340,9 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) CodeGenError!Mir {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const func = zcu.funcInfo(func_index);
@@ -358,11 +361,10 @@ pub fn generate(
     var function: Self = .{
         .gpa = gpa,
         .pt = pt,
-        .air = air,
-        .liveness = liveness,
+        .air = air.*,
+        .liveness = liveness.*,
         .target = target,
         .bin_file = lf,
-        .debug_output = debug_output,
         .func_index = func_index,
         .err_msg = null,
         .args = undefined, // populated after `resolveCallingConventionValues`
@@ -402,31 +404,15 @@ pub fn generate(
             return function.fail("failed to generate debug info: {s}", .{@errorName(err)});
     }
 
-    var mir = Mir{
+    var mir: Mir = .{
         .instructions = function.mir_instructions.toOwnedSlice(),
-        .extra = try function.mir_extra.toOwnedSlice(gpa),
-    };
-    defer mir.deinit(gpa);
-
-    var emit = Emit{
-        .mir = mir,
-        .bin_file = lf,
-        .debug_output = debug_output,
-        .target = target,
-        .src_loc = src_loc,
-        .code = code,
-        .prev_di_pc = 0,
-        .prev_di_line = func.lbrace_line,
-        .prev_di_column = func.lbrace_column,
-        .stack_size = function.max_end_stack,
+        .extra = &.{}, // fallible, so assign after errdefer
+        .max_end_stack = function.max_end_stack,
         .saved_regs_stack_space = function.saved_regs_stack_space,
     };
-    defer emit.deinit();
-
-    emit.emitMir() catch |err| switch (err) {
-        error.EmitFail => return function.failMsg(emit.err_msg.?),
-        else => |e| return e,
-    };
+    errdefer mir.deinit(gpa);
+    mir.extra = try function.mir_extra.toOwnedSlice(gpa);
+    return mir;
 }
 
 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
diff --git a/src/arch/arm/Mir.zig b/src/arch/arm/Mir.zig
index 5e651b7939..0366663eae 100644
--- a/src/arch/arm/Mir.zig
+++ b/src/arch/arm/Mir.zig
@@ -13,6 +13,14 @@ const assert = std.debug.assert;
 
 const bits = @import("bits.zig");
 const Register = bits.Register;
+const InternPool = @import("../../InternPool.zig");
+const Emit = @import("Emit.zig");
+const codegen = @import("../../codegen.zig");
+const link = @import("../../link.zig");
+const Zcu = @import("../../Zcu.zig");
+
+max_end_stack: u32,
+saved_regs_stack_space: u32,
 
 instructions: std.MultiArrayList(Inst).Slice,
 /// The meaning of this data is determined by `Inst.Tag` value.
@@ -278,6 +286,41 @@ pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.* = undefined;
 }
 
+pub fn emit(
+    mir: Mir,
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    code: *std.ArrayListUnmanaged(u8),
+    debug_output: link.File.DebugInfoOutput,
+    air: *const @import("../../Air.zig"),
+) codegen.CodeGenError!void {
+    _ = air; // using this would be a bug
+    const zcu = pt.zcu;
+    const func = zcu.funcInfo(func_index);
+    const nav = func.owner_nav;
+    const mod = zcu.navFileScope(nav).mod.?;
+    var e: Emit = .{
+        .mir = mir,
+        .bin_file = lf,
+        .debug_output = debug_output,
+        .target = &mod.resolved_target.result,
+        .src_loc = src_loc,
+        .code = code,
+        .prev_di_pc = 0,
+        .prev_di_line = func.lbrace_line,
+        .prev_di_column = func.lbrace_column,
+        .stack_size = mir.max_end_stack,
+        .saved_regs_stack_space = mir.saved_regs_stack_space,
+    };
+    defer e.deinit();
+    e.emitMir() catch |err| switch (err) {
+        error.EmitFail => return zcu.codegenFailMsg(nav, e.err_msg.?),
+        else => |e1| return e1,
+    };
+}
+
 /// Returns the requested data, as well as the new index which is at the start of the
 /// trailers for the object.
 pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
diff --git a/src/arch/powerpc/CodeGen.zig b/src/arch/powerpc/CodeGen.zig
index 0cfee67ebd..4964fe19f4 100644
--- a/src/arch/powerpc/CodeGen.zig
+++ b/src/arch/powerpc/CodeGen.zig
@@ -19,19 +19,15 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) codegen.CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) codegen.CodeGenError!noreturn {
     _ = bin_file;
     _ = pt;
     _ = src_loc;
     _ = func_index;
     _ = air;
     _ = liveness;
-    _ = code;
-    _ = debug_output;
 
     unreachable;
 }
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 9fc51bd2d3..9b5e0ed69b 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -68,7 +68,6 @@ gpa: Allocator,
 
 mod: *Package.Module,
 target: *const std.Target,
-debug_output: link.File.DebugInfoOutput,
 args: []MCValue,
 ret_mcv: InstTracking,
 fn_type: Type,
@@ -746,13 +745,10 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) CodeGenError!Mir {
     const zcu = pt.zcu;
-    const comp = zcu.comp;
     const gpa = zcu.gpa;
     const ip = &zcu.intern_pool;
     const func = zcu.funcInfo(func_index);
@@ -769,13 +765,12 @@ pub fn generate(
 
     var function: Func = .{
         .gpa = gpa,
-        .air = air,
+        .air = air.*,
         .pt = pt,
         .mod = mod,
         .bin_file = bin_file,
-        .liveness = liveness,
+        .liveness = liveness.*,
         .target = &mod.resolved_target.result,
-        .debug_output = debug_output,
         .owner = .{ .nav_index = func.owner_nav },
         .args = undefined, // populated after `resolveCallingConventionValues`
         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
@@ -855,33 +850,8 @@ pub fn generate(
         .instructions = function.mir_instructions.toOwnedSlice(),
         .frame_locs = function.frame_locs.toOwnedSlice(),
     };
-    defer mir.deinit(gpa);
-
-    var emit: Emit = .{
-        .lower = .{
-            .pt = pt,
-            .allocator = gpa,
-            .mir = mir,
-            .cc = fn_info.cc,
-            .src_loc = src_loc,
-            .output_mode = comp.config.output_mode,
-            .link_mode = comp.config.link_mode,
-            .pic = mod.pic,
-        },
-        .bin_file = bin_file,
-        .debug_output = debug_output,
-        .code = code,
-        .prev_di_pc = 0,
-        .prev_di_line = func.lbrace_line,
-        .prev_di_column = func.lbrace_column,
-    };
-    defer emit.deinit();
-
-    emit.emitMir() catch |err| switch (err) {
-        error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?),
-        error.InvalidInstruction => |e| return function.fail("emit MIR failed: {s} (Zig compiler bug)", .{@errorName(e)}),
-        else => |e| return e,
-    };
+    errdefer mir.deinit(gpa);
+    return mir;
 }
 
 pub fn generateLazy(
@@ -904,7 +874,6 @@ pub fn generateLazy(
         .bin_file = bin_file,
         .liveness = undefined,
         .target = &mod.resolved_target.result,
-        .debug_output = debug_output,
         .owner = .{ .lazy_sym = lazy_sym },
         .args = undefined, // populated after `resolveCallingConventionValues`
         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
@@ -4760,6 +4729,9 @@ fn genArgDbgInfo(func: *const Func, inst: Air.Inst.Index, mcv: MCValue) InnerErr
     const ty = arg.ty.toType();
     if (arg.name == .none) return;
 
+    // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+    //       We aren't allowed to interact with linker state here.
+    if (true) return;
     switch (func.debug_output) {
         .dwarf => |dw| switch (mcv) {
             .register => |reg| dw.genLocalDebugInfo(
@@ -5273,6 +5245,9 @@ fn genVarDbgInfo(
     mcv: MCValue,
     name: []const u8,
 ) !void {
+    // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+    //       We aren't allowed to interact with linker state here.
+    if (true) return;
     switch (func.debug_output) {
         .dwarf => |dwarf| {
             const loc: link.File.Dwarf.Loc = switch (mcv) {
diff --git a/src/arch/riscv64/Mir.zig b/src/arch/riscv64/Mir.zig
index 2ae62fd9b2..eef3fe7511 100644
--- a/src/arch/riscv64/Mir.zig
+++ b/src/arch/riscv64/Mir.zig
@@ -109,6 +109,50 @@ pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.* = undefined;
 }
 
+pub fn emit(
+    mir: Mir,
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    code: *std.ArrayListUnmanaged(u8),
+    debug_output: link.File.DebugInfoOutput,
+    air: *const @import("../../Air.zig"),
+) codegen.CodeGenError!void {
+    _ = air; // using this would be a bug
+    const zcu = pt.zcu;
+    const comp = zcu.comp;
+    const gpa = comp.gpa;
+    const func = zcu.funcInfo(func_index);
+    const fn_info = zcu.typeToFunc(.fromInterned(func.ty)).?;
+    const nav = func.owner_nav;
+    const mod = zcu.navFileScope(nav).mod.?;
+    var e: Emit = .{
+        .lower = .{
+            .pt = pt,
+            .allocator = gpa,
+            .mir = mir,
+            .cc = fn_info.cc,
+            .src_loc = src_loc,
+            .output_mode = comp.config.output_mode,
+            .link_mode = comp.config.link_mode,
+            .pic = mod.pic,
+        },
+        .bin_file = lf,
+        .debug_output = debug_output,
+        .code = code,
+        .prev_di_pc = 0,
+        .prev_di_line = func.lbrace_line,
+        .prev_di_column = func.lbrace_column,
+    };
+    defer e.deinit();
+    e.emitMir() catch |err| switch (err) {
+        error.LowerFail, error.EmitFail => return zcu.codegenFailMsg(nav, e.lower.err_msg.?),
+        error.InvalidInstruction => return zcu.codegenFail(nav, "emit MIR failed: {s} (Zig compiler bug)", .{@errorName(err)}),
+        else => |err1| return err1,
+    };
+}
+
 pub const FrameLoc = struct {
     base: Register,
     disp: i32,
@@ -202,3 +246,9 @@ const FrameIndex = bits.FrameIndex;
 const FrameAddr = @import("CodeGen.zig").FrameAddr;
 const IntegerBitSet = std.bit_set.IntegerBitSet;
 const Mnemonic = @import("mnem.zig").Mnemonic;
+
+const InternPool = @import("../../InternPool.zig");
+const Emit = @import("Emit.zig");
+const codegen = @import("../../codegen.zig");
+const link = @import("../../link.zig");
+const Zcu = @import("../../Zcu.zig");
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index ad9884dcdb..180aaedd3c 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -57,8 +57,6 @@ liveness: Air.Liveness,
 bin_file: *link.File,
 target: *const std.Target,
 func_index: InternPool.Index,
-code: *std.ArrayListUnmanaged(u8),
-debug_output: link.File.DebugInfoOutput,
 err_msg: ?*ErrorMsg,
 args: []MCValue,
 ret_mcv: MCValue,
@@ -268,11 +266,9 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) CodeGenError!Mir {
     const zcu = pt.zcu;
     const gpa = zcu.gpa;
     const func = zcu.funcInfo(func_index);
@@ -291,13 +287,11 @@ pub fn generate(
     var function: Self = .{
         .gpa = gpa,
         .pt = pt,
-        .air = air,
-        .liveness = liveness,
+        .air = air.*,
+        .liveness = liveness.*,
         .target = target,
         .bin_file = lf,
         .func_index = func_index,
-        .code = code,
-        .debug_output = debug_output,
         .err_msg = null,
         .args = undefined, // populated after `resolveCallingConventionValues`
         .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
@@ -330,29 +324,13 @@ pub fn generate(
         else => |e| return e,
     };
 
-    var mir = Mir{
+    var mir: Mir = .{
         .instructions = function.mir_instructions.toOwnedSlice(),
-        .extra = try function.mir_extra.toOwnedSlice(gpa),
-    };
-    defer mir.deinit(gpa);
-
-    var emit: Emit = .{
-        .mir = mir,
-        .bin_file = lf,
-        .debug_output = debug_output,
-        .target = target,
-        .src_loc = src_loc,
-        .code = code,
-        .prev_di_pc = 0,
-        .prev_di_line = func.lbrace_line,
-        .prev_di_column = func.lbrace_column,
-    };
-    defer emit.deinit();
-
-    emit.emitMir() catch |err| switch (err) {
-        error.EmitFail => return function.failMsg(emit.err_msg.?),
-        else => |e| return e,
+        .extra = &.{}, // fallible, so populated after errdefer
     };
+    errdefer mir.deinit(gpa);
+    mir.extra = try function.mir_extra.toOwnedSlice(gpa);
+    return mir;
 }
 
 fn gen(self: *Self) !void {
@@ -3566,6 +3544,9 @@ fn genArgDbgInfo(self: Self, inst: Air.Inst.Index, mcv: MCValue) !void {
     const ty = arg.ty.toType();
     if (arg.name == .none) return;
 
+    // TODO: Add a pseudo-instruction or something to defer this work until Emit.
+    //       We aren't allowed to interact with linker state here.
+    if (true) return;
     switch (self.debug_output) {
         .dwarf => |dw| switch (mcv) {
             .register => |reg| try dw.genLocalDebugInfo(
diff --git a/src/arch/sparc64/Mir.zig b/src/arch/sparc64/Mir.zig
index e9086db7a5..26c5c3267b 100644
--- a/src/arch/sparc64/Mir.zig
+++ b/src/arch/sparc64/Mir.zig
@@ -12,7 +12,11 @@ const assert = std.debug.assert;
 
 const Mir = @This();
 const bits = @import("bits.zig");
-const Air = @import("../../Air.zig");
+const InternPool = @import("../../InternPool.zig");
+const Emit = @import("Emit.zig");
+const codegen = @import("../../codegen.zig");
+const link = @import("../../link.zig");
+const Zcu = @import("../../Zcu.zig");
 
 const Instruction = bits.Instruction;
 const ASI = bits.Instruction.ASI;
@@ -370,6 +374,39 @@ pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.* = undefined;
 }
 
+pub fn emit(
+    mir: Mir,
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    code: *std.ArrayListUnmanaged(u8),
+    debug_output: link.File.DebugInfoOutput,
+    air: *const @import("../../Air.zig"),
+) codegen.CodeGenError!void {
+    _ = air; // using this would be a bug
+    const zcu = pt.zcu;
+    const func = zcu.funcInfo(func_index);
+    const nav = func.owner_nav;
+    const mod = zcu.navFileScope(nav).mod.?;
+    var e: Emit = .{
+        .mir = mir,
+        .bin_file = lf,
+        .debug_output = debug_output,
+        .target = &mod.resolved_target.result,
+        .src_loc = src_loc,
+        .code = code,
+        .prev_di_pc = 0,
+        .prev_di_line = func.lbrace_line,
+        .prev_di_column = func.lbrace_column,
+    };
+    defer e.deinit();
+    e.emitMir() catch |err| switch (err) {
+        error.EmitFail => return zcu.codegenFailMsg(nav, e.err_msg.?),
+        else => |err1| return err1,
+    };
+}
+
 /// Returns the requested data, as well as the new index which is at the start of the
 /// trailers for the object.
 pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end: usize } {
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index b38492d500..1d95c8db77 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -125,7 +125,6 @@ pt: Zcu.PerThread,
 air: Air,
 liveness: Air.Liveness,
 bin_file: *link.File,
-debug_output: link.File.DebugInfoOutput,
 target: *const std.Target,
 owner: Owner,
 inline_func: InternPool.Index,
@@ -972,13 +971,10 @@ pub fn generate(
     pt: Zcu.PerThread,
     src_loc: Zcu.LazySrcLoc,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
-    code: *std.ArrayListUnmanaged(u8),
-    debug_output: link.File.DebugInfoOutput,
-) codegen.CodeGenError!void {
+    air: *const Air,
+    liveness: *const Air.Liveness,
+) codegen.CodeGenError!Mir {
     const zcu = pt.zcu;
-    const comp = zcu.comp;
     const gpa = zcu.gpa;
     const ip = &zcu.intern_pool;
     const func = zcu.funcInfo(func_index);
@@ -988,12 +984,11 @@ pub fn generate(
     var function: CodeGen = .{
         .gpa = gpa,
         .pt = pt,
-        .air = air,
-        .liveness = liveness,
+        .air = air.*,
+        .liveness = liveness.*,
         .target = &mod.resolved_target.result,
         .mod = mod,
         .bin_file = bin_file,
-        .debug_output = debug_output,
         .owner = .{ .nav_index = func.owner_nav },
         .inline_func = func_index,
         .arg_index = undefined,
@@ -1090,7 +1085,7 @@ pub fn generate(
     };
 
     // Drop them off at the rbrace.
-    if (debug_output != .none) _ = try function.addInst(.{
+    if (!mod.strip) _ = try function.addInst(.{
         .tag = .pseudo,
         .ops = .pseudo_dbg_line_line_column,
         .data = .{ .line_column = .{
@@ -1100,49 +1095,17 @@ pub fn generate(
     });
 
     var mir: Mir = .{
-        .instructions = function.mir_instructions.toOwnedSlice(),
-        .extra = try function.mir_extra.toOwnedSlice(gpa),
-        .table = try function.mir_table.toOwnedSlice(gpa),
-        .frame_locs = function.frame_locs.toOwnedSlice(),
-    };
-    defer mir.deinit(gpa);
-
-    var emit: Emit = .{
-        .air = function.air,
-        .lower = .{
-            .bin_file = bin_file,
-            .target = function.target,
-            .allocator = gpa,
-            .mir = mir,
-            .cc = fn_info.cc,
-            .src_loc = src_loc,
-            .output_mode = comp.config.output_mode,
-            .link_mode = comp.config.link_mode,
-            .pic = mod.pic,
-        },
-        .atom_index = function.owner.getSymbolIndex(&function) catch |err| switch (err) {
-            error.CodegenFail => return error.CodegenFail,
-            else => |e| return e,
-        },
-        .debug_output = debug_output,
-        .code = code,
-        .prev_di_loc = .{
-            .line = func.lbrace_line,
-            .column = func.lbrace_column,
-            .is_stmt = switch (debug_output) {
-                .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt,
-                .plan9 => undefined,
-                .none => undefined,
-            },
-        },
-        .prev_di_pc = 0,
-    };
-    emit.emitMir() catch |err| switch (err) {
-        error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?),
-
-        error.InvalidInstruction, error.CannotEncode => |e| return function.fail("emit MIR failed: {s} (Zig compiler bug)", .{@errorName(e)}),
-        else => |e| return function.fail("emit MIR failed: {s}", .{@errorName(e)}),
+        .instructions = .empty,
+        .extra = &.{},
+        .table = &.{},
+        .frame_locs = .empty,
     };
+    errdefer mir.deinit(gpa);
+    mir.instructions = function.mir_instructions.toOwnedSlice();
+    mir.extra = try function.mir_extra.toOwnedSlice(gpa);
+    mir.table = try function.mir_table.toOwnedSlice(gpa);
+    mir.frame_locs = function.frame_locs.toOwnedSlice();
+    return mir;
 }
 
 pub fn generateLazy(
@@ -1165,7 +1128,6 @@ pub fn generateLazy(
         .target = &mod.resolved_target.result,
         .mod = mod,
         .bin_file = bin_file,
-        .debug_output = debug_output,
         .owner = .{ .lazy_sym = lazy_sym },
         .inline_func = undefined,
         .arg_index = undefined,
@@ -2339,7 +2301,7 @@ fn gen(self: *CodeGen) InnerError!void {
             else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}),
         };
 
-        if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_prologue_end_none);
+        if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_prologue_end_none);
 
         try self.genBody(self.air.getMainBody());
 
@@ -2356,7 +2318,7 @@ fn gen(self: *CodeGen) InnerError!void {
             }
             for (self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc);
 
-            if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
+            if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
             const backpatch_stack_dealloc = try self.asmPlaceholder();
             const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
             try self.asmRegister(.{ ._, .pop }, .rbp);
@@ -2475,9 +2437,9 @@ fn gen(self: *CodeGen) InnerError!void {
             });
         }
     } else {
-        if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_prologue_end_none);
+        if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_prologue_end_none);
         try self.genBody(self.air.getMainBody());
-        if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
+        if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
     }
 }
 
@@ -2498,9 +2460,9 @@ fn checkInvariantsAfterAirInst(self: *CodeGen) void {
 }
 
 fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
-    if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_enter_block_none);
+    if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_enter_block_none);
     try self.genBody(body);
-    if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_leave_block_none);
+    if (!self.mod.strip) try self.asmPseudo(.pseudo_dbg_leave_block_none);
 }
 
 fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
@@ -2544,7 +2506,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .shuffle_one, .shuffle_two => @panic("x86_64 TODO: shuffle_one/shuffle_two"),
             // zig fmt: on
 
-            .arg => if (cg.debug_output != .none) {
+            .arg => if (!cg.mod.strip) {
                 // skip zero-bit arguments as they don't have a corresponding arg instruction
                 var arg_index = cg.arg_index;
                 while (cg.args[arg_index] == .none) arg_index += 1;
@@ -64179,9 +64141,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .block => {
                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
                 const block = cg.air.extraData(Air.Block, ty_pl.payload);
-                if (cg.debug_output != .none) try cg.asmPseudo(.pseudo_dbg_enter_block_none);
+                if (!cg.mod.strip) try cg.asmPseudo(.pseudo_dbg_enter_block_none);
                 try cg.lowerBlock(inst, @ptrCast(cg.air.extra.items[block.end..][0..block.data.body_len]));
-                if (cg.debug_output != .none) try cg.asmPseudo(.pseudo_dbg_leave_block_none);
+                if (!cg.mod.strip) try cg.asmPseudo(.pseudo_dbg_leave_block_none);
             },
             .loop => if (use_old) try cg.airLoop(inst) else {
                 const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
@@ -85191,7 +85153,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .switch_dispatch => try cg.airSwitchDispatch(inst),
             .@"try", .try_cold => try cg.airTry(inst),
             .try_ptr, .try_ptr_cold => try cg.airTryPtr(inst),
-            .dbg_stmt => if (cg.debug_output != .none) {
+            .dbg_stmt => if (!cg.mod.strip) {
                 const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt;
                 _ = try cg.addInst(.{
                     .tag = .pseudo,
@@ -85202,7 +85164,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                     } },
                 });
             },
-            .dbg_empty_stmt => if (cg.debug_output != .none) {
+            .dbg_empty_stmt => if (!cg.mod.strip) {
                 if (cg.mir_instructions.len > 0) {
                     const prev_mir_op = &cg.mir_instructions.items(.ops)[cg.mir_instructions.len - 1];
                     if (prev_mir_op.* == .pseudo_dbg_line_line_column)
@@ -85216,13 +85178,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
                 const old_inline_func = cg.inline_func;
                 defer cg.inline_func = old_inline_func;
                 cg.inline_func = dbg_inline_block.data.func;
-                if (cg.debug_output != .none) _ = try cg.addInst(.{
+                if (!cg.mod.strip) _ = try cg.addInst(.{
                     .tag = .pseudo,
                     .ops = .pseudo_dbg_enter_inline_func,
                     .data = .{ .func = dbg_inline_block.data.func },
                 });
                 try cg.lowerBlock(inst, @ptrCast(cg.air.extra.items[dbg_inline_block.end..][0..dbg_inline_block.data.body_len]));
-                if (cg.debug_output != .none) _ = try cg.addInst(.{
+                if (!cg.mod.strip) _ = try cg.addInst(.{
                     .tag = .pseudo,
                     .ops = .pseudo_dbg_leave_inline_func,
                     .data = .{ .func = old_inline_func },
@@ -85231,7 +85193,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .dbg_var_ptr,
             .dbg_var_val,
             .dbg_arg_inline,
-            => if (use_old) try cg.airDbgVar(inst) else if (cg.debug_output != .none) {
+            => if (use_old) try cg.airDbgVar(inst) else if (!cg.mod.strip) {
                 const pl_op = air_datas[@intFromEnum(inst)].pl_op;
                 var ops = try cg.tempsFromOperands(inst, .{pl_op.operand});
                 var mcv = ops[0].tracking(cg).short;
@@ -173366,7 +173328,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
     while (self.args[arg_index] == .none) arg_index += 1;
     self.arg_index = arg_index + 1;
 
-    const result: MCValue = if (self.debug_output == .none and self.liveness.isUnused(inst)) .unreach else result: {
+    const result: MCValue = if (self.mod.strip and self.liveness.isUnused(inst)) .unreach else result: {
         const arg_ty = self.typeOfIndex(inst);
         const src_mcv = self.args[arg_index];
         switch (src_mcv) {
@@ -173468,7 +173430,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void {
 }
 
 fn airDbgVarArgs(self: *CodeGen) !void {
-    if (self.debug_output == .none) return;
+    if (self.mod.strip) return;
     if (!self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) return;
     try self.asmPseudo(.pseudo_dbg_var_args_none);
 }
@@ -173478,7 +173440,7 @@ fn genLocalDebugInfo(
     inst: Air.Inst.Index,
     mcv: MCValue,
 ) !void {
-    if (self.debug_output == .none) return;
+    if (self.mod.strip) return;
     switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) {
         else => unreachable,
         .arg, .dbg_arg_inline, .dbg_var_val => |tag| {
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index 8d202e6bae..14468677af 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -1929,6 +1929,67 @@ pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.* = undefined;
 }
 
+pub fn emit(
+    mir: Mir,
+    lf: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    code: *std.ArrayListUnmanaged(u8),
+    debug_output: link.File.DebugInfoOutput,
+    /// TODO: remove dependency on this argument. This blocks enabling `Zcu.Feature.separate_thread`.
+    air: *const Air,
+) codegen.CodeGenError!void {
+    const zcu = pt.zcu;
+    const comp = zcu.comp;
+    const gpa = comp.gpa;
+    const func = zcu.funcInfo(func_index);
+    const fn_info = zcu.typeToFunc(.fromInterned(func.ty)).?;
+    const nav = func.owner_nav;
+    const mod = zcu.navFileScope(nav).mod.?;
+    var e: Emit = .{
+        .air = air.*,
+        .lower = .{
+            .bin_file = lf,
+            .target = &mod.resolved_target.result,
+            .allocator = gpa,
+            .mir = mir,
+            .cc = fn_info.cc,
+            .src_loc = src_loc,
+            .output_mode = comp.config.output_mode,
+            .link_mode = comp.config.link_mode,
+            .pic = mod.pic,
+        },
+        .atom_index = sym: {
+            if (lf.cast(.elf)) |ef| break :sym try ef.zigObjectPtr().?.getOrCreateMetadataForNav(zcu, nav);
+            if (lf.cast(.macho)) |mf| break :sym try mf.getZigObject().?.getOrCreateMetadataForNav(mf, nav);
+            if (lf.cast(.coff)) |cf| {
+                const atom = try cf.getOrCreateAtomForNav(nav);
+                break :sym cf.getAtom(atom).getSymbolIndex().?;
+            }
+            if (lf.cast(.plan9)) |p9f| break :sym try p9f.seeNav(pt, nav);
+            unreachable;
+        },
+        .debug_output = debug_output,
+        .code = code,
+        .prev_di_loc = .{
+            .line = func.lbrace_line,
+            .column = func.lbrace_column,
+            .is_stmt = switch (debug_output) {
+                .dwarf => |dwarf| dwarf.dwarf.debug_line.header.default_is_stmt,
+                .plan9 => undefined,
+                .none => undefined,
+            },
+        },
+        .prev_di_pc = 0,
+    };
+    e.emitMir() catch |err| switch (err) {
+        error.LowerFail, error.EmitFail => return zcu.codegenFailMsg(nav, e.lower.err_msg.?),
+        error.InvalidInstruction, error.CannotEncode => return zcu.codegenFail(nav, "emit MIR failed: {s} (Zig compiler bug)", .{@errorName(err)}),
+        else => return zcu.codegenFail(nav, "emit MIR failed: {s}", .{@errorName(err)}),
+    };
+}
+
 pub fn extraData(mir: Mir, comptime T: type, index: u32) struct { data: T, end: u32 } {
     const fields = std.meta.fields(T);
     var i: u32 = index;
@@ -1987,3 +2048,7 @@ const IntegerBitSet = std.bit_set.IntegerBitSet;
 const InternPool = @import("../../InternPool.zig");
 const Mir = @This();
 const Register = bits.Register;
+const Emit = @import("Emit.zig");
+const codegen = @import("../../codegen.zig");
+const link = @import("../../link.zig");
+const Zcu = @import("../../Zcu.zig");
diff --git a/src/codegen.zig b/src/codegen.zig
index 2c2524257c..ea57aaf89c 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -182,7 +182,7 @@ pub fn emitFunction(
     /// in the pipeline. Any information needed to call emit must be stored in MIR.
     /// This is `undefined` if the backend supports the `separate_thread` feature.
     air: *const Air,
-) Allocator.Error!void {
+) CodeGenError!void {
     const zcu = pt.zcu;
     const func = zcu.funcInfo(func_index);
     const target = zcu.navFileScope(func.owner_nav).mod.?.resolved_target.result;
diff --git a/src/libs/freebsd.zig b/src/libs/freebsd.zig
index 98d4a42f91..d90ba974fc 100644
--- a/src/libs/freebsd.zig
+++ b/src/libs/freebsd.zig
@@ -985,7 +985,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
     assert(comp.freebsd_so_files == null);
     comp.freebsd_so_files = so_files;
 
-    var task_buffer: [libs.len]link.Task = undefined;
+    var task_buffer: [libs.len]link.PrelinkTask = undefined;
     var task_buffer_i: usize = 0;
 
     {
diff --git a/src/libs/glibc.zig b/src/libs/glibc.zig
index c1146d933d..ed5eae377f 100644
--- a/src/libs/glibc.zig
+++ b/src/libs/glibc.zig
@@ -1148,7 +1148,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
     assert(comp.glibc_so_files == null);
     comp.glibc_so_files = so_files;
 
-    var task_buffer: [libs.len]link.Task = undefined;
+    var task_buffer: [libs.len]link.PrelinkTask = undefined;
     var task_buffer_i: usize = 0;
 
     {
diff --git a/src/libs/netbsd.zig b/src/libs/netbsd.zig
index aab75cce49..7121c308f5 100644
--- a/src/libs/netbsd.zig
+++ b/src/libs/netbsd.zig
@@ -650,7 +650,7 @@ fn queueSharedObjects(comp: *Compilation, so_files: BuiltSharedObjects) void {
     assert(comp.netbsd_so_files == null);
     comp.netbsd_so_files = so_files;
 
-    var task_buffer: [libs.len]link.Task = undefined;
+    var task_buffer: [libs.len]link.PrelinkTask = undefined;
     var task_buffer_i: usize = 0;
 
     {
diff --git a/src/link.zig b/src/link.zig
index 31fd0a4a4e..838654775d 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -759,6 +759,8 @@ pub const File = struct {
         switch (base.tag) {
             .lld => unreachable,
             inline else => |tag| {
+                if (tag == .wasm) @panic("MLUGG TODO");
+                if (tag == .spirv) @panic("MLUGG TODO");
                 dev.check(tag.devFeature());
                 return @as(*tag.Type(), @fieldParentPtr("base", base)).updateFunc(pt, func_index, mir, maybe_undef_air);
             },
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 9a040754ef..bb8faf583d 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1057,8 +1057,10 @@ pub fn updateFunc(
     coff: *Coff,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    /// This may be `undefined`; only pass it to `emitFunction`.
+    /// This parameter will eventually be removed.
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     if (build_options.skip_non_native and builtin.object_format != .coff) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
@@ -1079,15 +1081,15 @@ pub fn updateFunc(
     var code_buffer: std.ArrayListUnmanaged(u8) = .empty;
     defer code_buffer.deinit(gpa);
 
-    try codegen.generateFunction(
+    try codegen.emitFunction(
         &coff.base,
         pt,
         zcu.navSrcLoc(nav_index),
         func_index,
-        air,
-        liveness,
+        mir,
         &code_buffer,
         .none,
+        maybe_undef_air,
     );
 
     try coff.updateNavCode(pt, nav_index, code_buffer.items, .FUNCTION);
diff --git a/src/link/Goff.zig b/src/link/Goff.zig
index 28da184495..d0c2b8e80b 100644
--- a/src/link/Goff.zig
+++ b/src/link/Goff.zig
@@ -13,6 +13,7 @@ const Path = std.Build.Cache.Path;
 const Zcu = @import("../Zcu.zig");
 const InternPool = @import("../InternPool.zig");
 const Compilation = @import("../Compilation.zig");
+const codegen = @import("../codegen.zig");
 const link = @import("../link.zig");
 const trace = @import("../tracy.zig").trace;
 const build_options = @import("build_options");
@@ -72,14 +73,14 @@ pub fn updateFunc(
     self: *Goff,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     _ = self;
     _ = pt;
     _ = func_index;
-    _ = air;
-    _ = liveness;
+    _ = mir;
+    _ = maybe_undef_air;
     unreachable; // we always use llvm
 }
 
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 2c30b34215..8fd85df0a3 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -3051,13 +3051,13 @@ pub fn updateFunc(
     self: *MachO,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     if (build_options.skip_non_native and builtin.object_format != .macho) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
     }
-    return self.getZigObject().?.updateFunc(self, pt, func_index, air, liveness);
+    return self.getZigObject().?.updateFunc(self, pt, func_index, mir, maybe_undef_air);
 }
 
 pub fn updateNav(self: *MachO, pt: Zcu.PerThread, nav: InternPool.Nav.Index) link.File.UpdateNavError!void {
diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig
index 13ebb40cf9..f378a9c410 100644
--- a/src/link/MachO/ZigObject.zig
+++ b/src/link/MachO/ZigObject.zig
@@ -777,8 +777,10 @@ pub fn updateFunc(
     macho_file: *MachO,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    /// This may be `undefined`; only pass it to `emitFunction`.
+    /// This parameter will eventually be removed.
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     const tracy = trace(@src());
     defer tracy.end();
@@ -796,15 +798,15 @@ pub fn updateFunc(
     var debug_wip_nav = if (self.dwarf) |*dwarf| try dwarf.initWipNav(pt, func.owner_nav, sym_index) else null;
     defer if (debug_wip_nav) |*wip_nav| wip_nav.deinit();
 
-    try codegen.generateFunction(
+    try codegen.emitFunction(
         &macho_file.base,
         pt,
         zcu.navSrcLoc(func.owner_nav),
         func_index,
-        air,
-        liveness,
+        mir,
         &code_buffer,
         if (debug_wip_nav) |*wip_nav| .{ .dwarf = wip_nav } else .none,
+        maybe_undef_air,
     );
     const code = code_buffer.items;
 
diff --git a/src/link/Plan9.zig b/src/link/Plan9.zig
index c487169b3f..0d0699f0f0 100644
--- a/src/link/Plan9.zig
+++ b/src/link/Plan9.zig
@@ -386,8 +386,10 @@ pub fn updateFunc(
     self: *Plan9,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    /// This may be `undefined`; only pass it to `emitFunction`.
+    /// This parameter will eventually be removed.
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     if (build_options.skip_non_native and builtin.object_format != .plan9) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
@@ -412,15 +414,15 @@ pub fn updateFunc(
     };
     defer dbg_info_output.dbg_line.deinit();
 
-    try codegen.generateFunction(
+    try codegen.emitFunction(
         &self.base,
         pt,
         zcu.navSrcLoc(func.owner_nav),
         func_index,
-        air,
-        liveness,
+        mir,
         &code_buffer,
         .{ .plan9 = &dbg_info_output },
+        maybe_undef_air,
     );
     const code = try code_buffer.toOwnedSlice(gpa);
     self.getAtomPtr(atom_idx).code = .{
diff --git a/src/link/Queue.zig b/src/link/Queue.zig
index c73a0e9684..3436be5921 100644
--- a/src/link/Queue.zig
+++ b/src/link/Queue.zig
@@ -97,8 +97,7 @@ pub fn mirReady(q: *Queue, comp: *Compilation, mir: *ZcuTask.LinkFunc.SharedMir)
         q.mutex.lock();
         defer q.mutex.unlock();
         switch (q.state) {
-            .finished => unreachable, // there's definitely a task queued
-            .running => return,
+            .finished, .running => return,
             .wait_for_mir => |wait_for| if (wait_for != mir) return,
         }
         // We were waiting for `mir`, so we will restart the linker thread.
diff --git a/src/link/Xcoff.zig b/src/link/Xcoff.zig
index 7fe714ce6e..97ea300ed2 100644
--- a/src/link/Xcoff.zig
+++ b/src/link/Xcoff.zig
@@ -13,6 +13,7 @@ const Path = std.Build.Cache.Path;
 const Zcu = @import("../Zcu.zig");
 const InternPool = @import("../InternPool.zig");
 const Compilation = @import("../Compilation.zig");
+const codegen = @import("../codegen.zig");
 const link = @import("../link.zig");
 const trace = @import("../tracy.zig").trace;
 const build_options = @import("build_options");
@@ -72,14 +73,14 @@ pub fn updateFunc(
     self: *Xcoff,
     pt: Zcu.PerThread,
     func_index: InternPool.Index,
-    air: Air,
-    liveness: Air.Liveness,
+    mir: *const codegen.AnyMir,
+    maybe_undef_air: *const Air,
 ) link.File.UpdateNavError!void {
     _ = self;
     _ = pt;
     _ = func_index;
-    _ = air;
-    _ = liveness;
+    _ = mir;
+    _ = maybe_undef_air;
     unreachable; // we always use llvm
 }
 
-- 
cgit v1.2.3


From db5d85b8c89b755bd8865def3bd7114d5d9d4867 Mon Sep 17 00:00:00 2001
From: mlugg <mlugg@mlugg.co.uk>
Date: Sun, 8 Jun 2025 21:47:29 +0100
Subject: compiler: improve progress output

* "Flush" nodes ("LLVM Emit Object", "ELF Flush") appear under "Linking"

* "Code Generation" disappears when all analysis and codegen is done

* We only show one node under "Semantic Analysis" to accurately convey
  that analysis isn't happening in parallel, but rather that we're
  pausing one task to do another
---
 lib/std/Progress.zig  | 22 +++++++++++++++++++
 src/Compilation.zig   | 58 ++++++++++++++++++++++++++++++++-------------------
 src/Zcu.zig           | 38 +++++++++++++++++++++++++++++++--
 src/Zcu/PerThread.zig | 22 ++++++++++++-------
 src/link.zig          | 22 ++++++++++++-------
 src/link/Lld.zig      |  3 +++
 src/link/Queue.zig    |  2 +-
 7 files changed, 126 insertions(+), 41 deletions(-)

(limited to 'src/link/Queue.zig')

diff --git a/lib/std/Progress.zig b/lib/std/Progress.zig
index d9ff03a3fe..030f3f0a28 100644
--- a/lib/std/Progress.zig
+++ b/lib/std/Progress.zig
@@ -234,6 +234,28 @@ pub const Node = struct {
         _ = @atomicRmw(u32, &storage.completed_count, .Add, 1, .monotonic);
     }
 
+    /// Thread-safe. Bytes after '0' in `new_name` are ignored.
+    pub fn setName(n: Node, new_name: []const u8) void {
+        const index = n.index.unwrap() orelse return;
+        const storage = storageByIndex(index);
+
+        const name_len = @min(max_name_len, std.mem.indexOfScalar(u8, new_name, 0) orelse new_name.len);
+
+        copyAtomicStore(storage.name[0..name_len], new_name[0..name_len]);
+        if (name_len < storage.name.len)
+            @atomicStore(u8, &storage.name[name_len], 0, .monotonic);
+    }
+
+    /// Gets the name of this `Node`.
+    /// A pointer to this array can later be passed to `setName` to restore the name.
+    pub fn getName(n: Node) [max_name_len]u8 {
+        var dest: [max_name_len]u8 align(@alignOf(usize)) = undefined;
+        if (n.index.unwrap()) |index| {
+            copyAtomicLoad(&dest, &storageByIndex(index).name);
+        }
+        return dest;
+    }
+
     /// Thread-safe.
     pub fn setCompletedItems(n: Node, completed_items: usize) void {
         const index = n.index.unwrap() orelse return;
diff --git a/src/Compilation.zig b/src/Compilation.zig
index fe4671848d..74f841723e 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -255,7 +255,7 @@ test_filters: []const []const u8,
 test_name_prefix: ?[]const u8,
 
 link_task_wait_group: WaitGroup = .{},
-work_queue_progress_node: std.Progress.Node = .none,
+link_prog_node: std.Progress.Node = std.Progress.Node.none,
 
 llvm_opt_bisect_limit: c_int,
 
@@ -2795,6 +2795,17 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
         }
     }
 
+    // The linker progress node is set up here instead of in `performAllTheWork`, because
+    // we also want it around during `flush`.
+    const have_link_node = comp.bin_file != null;
+    if (have_link_node) {
+        comp.link_prog_node = main_progress_node.start("Linking", 0);
+    }
+    defer if (have_link_node) {
+        comp.link_prog_node.end();
+        comp.link_prog_node = .none;
+    };
+
     try comp.performAllTheWork(main_progress_node);
 
     if (comp.zcu) |zcu| {
@@ -2843,7 +2854,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
 
     switch (comp.cache_use) {
         .none, .incremental => {
-            try flush(comp, arena, .main, main_progress_node);
+            try flush(comp, arena, .main);
         },
         .whole => |whole| {
             if (comp.file_system_inputs) |buf| try man.populateFileSystemInputs(buf);
@@ -2919,7 +2930,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
                 }
             }
 
-            try flush(comp, arena, .main, main_progress_node);
+            try flush(comp, arena, .main);
 
             // Calling `flush` may have produced errors, in which case the
             // cache manifest must not be written.
@@ -3009,13 +3020,12 @@ fn flush(
     comp: *Compilation,
     arena: Allocator,
     tid: Zcu.PerThread.Id,
-    prog_node: std.Progress.Node,
 ) !void {
     if (comp.zcu) |zcu| {
         if (zcu.llvm_object) |llvm_object| {
             // Emit the ZCU object from LLVM now; it's required to flush the output file.
             // If there's an output file, it wants to decide where the LLVM object goes!
-            const sub_prog_node = prog_node.start("LLVM Emit Object", 0);
+            const sub_prog_node = comp.link_prog_node.start("LLVM Emit Object", 0);
             defer sub_prog_node.end();
             try llvm_object.emit(.{
                 .pre_ir_path = comp.verbose_llvm_ir,
@@ -3053,7 +3063,7 @@ fn flush(
     }
     if (comp.bin_file) |lf| {
         // This is needed before reading the error flags.
-        lf.flush(arena, tid, prog_node) catch |err| switch (err) {
+        lf.flush(arena, tid, comp.link_prog_node) catch |err| switch (err) {
             error.LinkFailure => {}, // Already reported.
             error.OutOfMemory => return error.OutOfMemory,
         };
@@ -4172,28 +4182,15 @@ pub fn addWholeFileError(
     }
 }
 
-pub fn performAllTheWork(
+fn performAllTheWork(
     comp: *Compilation,
     main_progress_node: std.Progress.Node,
 ) JobError!void {
-    comp.work_queue_progress_node = main_progress_node;
-    defer comp.work_queue_progress_node = .none;
-
+    // Regardless of errors, `comp.zcu` needs to update its generation number.
     defer if (comp.zcu) |zcu| {
-        zcu.sema_prog_node.end();
-        zcu.sema_prog_node = .none;
-        zcu.codegen_prog_node.end();
-        zcu.codegen_prog_node = .none;
-
         zcu.generation += 1;
     };
-    try comp.performAllTheWorkInner(main_progress_node);
-}
 
-fn performAllTheWorkInner(
-    comp: *Compilation,
-    main_progress_node: std.Progress.Node,
-) JobError!void {
     // Here we queue up all the AstGen tasks first, followed by C object compilation.
     // We wait until the AstGen tasks are all completed before proceeding to the
     // (at least for now) single-threaded main work queue. However, C object compilation
@@ -4513,8 +4510,24 @@ fn performAllTheWorkInner(
         }
 
         zcu.sema_prog_node = main_progress_node.start("Semantic Analysis", 0);
-        zcu.codegen_prog_node = if (comp.bin_file != null) main_progress_node.start("Code Generation", 0) else .none;
+        if (comp.bin_file != null) {
+            zcu.codegen_prog_node = main_progress_node.start("Code Generation", 0);
+        }
+        // We increment `pending_codegen_jobs` so that it doesn't reach 0 until after analysis finishes.
+        // That prevents the "Code Generation" node from constantly disappearing and reappearing when
+        // we're probably going to analyze more functions at some point.
+        assert(zcu.pending_codegen_jobs.swap(1, .monotonic) == 0); // don't let this become 0 until analysis finishes
     }
+    // When analysis ends, delete the progress nodes for "Semantic Analysis" and possibly "Code Generation".
+    defer if (comp.zcu) |zcu| {
+        zcu.sema_prog_node.end();
+        zcu.sema_prog_node = .none;
+        if (zcu.pending_codegen_jobs.rmw(.Sub, 1, .monotonic) == 1) {
+            // Decremented to 0, so all done.
+            zcu.codegen_prog_node.end();
+            zcu.codegen_prog_node = .none;
+        }
+    };
 
     if (!comp.separateCodegenThreadOk()) {
         // Waits until all input files have been parsed.
@@ -4583,6 +4596,7 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 .status = .init(.pending),
                 .value = undefined,
             };
+            assert(zcu.pending_codegen_jobs.rmw(.Add, 1, .monotonic) > 0); // the "Code Generation" node hasn't been ended
             if (comp.separateCodegenThreadOk()) {
                 // `workerZcuCodegen` takes ownership of `air`.
                 comp.thread_pool.spawnWgId(&comp.link_task_wait_group, workerZcuCodegen, .{ comp, func.func, air, shared_mir });
diff --git a/src/Zcu.zig b/src/Zcu.zig
index 91d2c0ffff..513492e818 100644
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@@ -66,8 +66,18 @@ root_mod: *Package.Module,
 /// `root_mod` is the test runner, and `main_mod` is the user's source file which has the tests.
 main_mod: *Package.Module,
 std_mod: *Package.Module,
-sema_prog_node: std.Progress.Node = std.Progress.Node.none,
-codegen_prog_node: std.Progress.Node = std.Progress.Node.none,
+sema_prog_node: std.Progress.Node = .none,
+codegen_prog_node: std.Progress.Node = .none,
+/// The number of codegen jobs which are pending or in-progress. Whichever thread drops this value
+/// to 0 is responsible for ending `codegen_prog_node`. While semantic analysis is happening, this
+/// value bottoms out at 1 instead of 0, to ensure that it can only drop to 0 after analysis is
+/// completed (since semantic analysis could trigger more codegen work).
+pending_codegen_jobs: std.atomic.Value(u32) = .init(0),
+
+/// This is the progress node *under* `sema_prog_node` which is currently running.
+/// When we have to pause to analyze something else, we just temporarily rename this node.
+/// Eventually, when we thread semantic analysis, we will want one of these per thread.
+cur_sema_prog_node: std.Progress.Node = .none,
 
 /// Used by AstGen worker to load and store ZIR cache.
 global_zir_cache: Cache.Directory,
@@ -4753,3 +4763,27 @@ fn explainWhyFileIsInModule(
         import = importer_ref.import;
     }
 }
+
+const SemaProgNode = struct {
+    /// `null` means we created the node, so should end it.
+    old_name: ?[std.Progress.Node.max_name_len]u8,
+    pub fn end(spn: SemaProgNode, zcu: *Zcu) void {
+        if (spn.old_name) |old_name| {
+            zcu.sema_prog_node.completeOne(); // we're just renaming, but it's effectively completion
+            zcu.cur_sema_prog_node.setName(&old_name);
+        } else {
+            zcu.cur_sema_prog_node.end();
+            zcu.cur_sema_prog_node = .none;
+        }
+    }
+};
+pub fn startSemaProgNode(zcu: *Zcu, name: []const u8) SemaProgNode {
+    if (zcu.cur_sema_prog_node.index != .none) {
+        const old_name = zcu.cur_sema_prog_node.getName();
+        zcu.cur_sema_prog_node.setName(name);
+        return .{ .old_name = old_name };
+    } else {
+        zcu.cur_sema_prog_node = zcu.sema_prog_node.start(name, 0);
+        return .{ .old_name = null };
+    }
+}
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
index f8efa40dc0..8bc723f2e8 100644
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@@ -796,8 +796,8 @@ pub fn ensureComptimeUnitUpToDate(pt: Zcu.PerThread, cu_id: InternPool.ComptimeU
         info.deps.clearRetainingCapacity();
     }
 
-    const unit_prog_node = zcu.sema_prog_node.start("comptime", 0);
-    defer unit_prog_node.end();
+    const unit_prog_node = zcu.startSemaProgNode("comptime");
+    defer unit_prog_node.end(zcu);
 
     return pt.analyzeComptimeUnit(cu_id) catch |err| switch (err) {
         error.AnalysisFail => {
@@ -976,8 +976,8 @@ pub fn ensureNavValUpToDate(pt: Zcu.PerThread, nav_id: InternPool.Nav.Index) Zcu
         info.deps.clearRetainingCapacity();
     }
 
-    const unit_prog_node = zcu.sema_prog_node.start(nav.fqn.toSlice(ip), 0);
-    defer unit_prog_node.end();
+    const unit_prog_node = zcu.startSemaProgNode(nav.fqn.toSlice(ip));
+    defer unit_prog_node.end(zcu);
 
     const invalidate_value: bool, const new_failed: bool = if (pt.analyzeNavVal(nav_id)) |result| res: {
         break :res .{
@@ -1396,8 +1396,8 @@ pub fn ensureNavTypeUpToDate(pt: Zcu.PerThread, nav_id: InternPool.Nav.Index) Zc
         info.deps.clearRetainingCapacity();
     }
 
-    const unit_prog_node = zcu.sema_prog_node.start(nav.fqn.toSlice(ip), 0);
-    defer unit_prog_node.end();
+    const unit_prog_node = zcu.startSemaProgNode(nav.fqn.toSlice(ip));
+    defer unit_prog_node.end(zcu);
 
     const invalidate_type: bool, const new_failed: bool = if (pt.analyzeNavType(nav_id)) |result| res: {
         break :res .{
@@ -1617,8 +1617,8 @@ pub fn ensureFuncBodyUpToDate(pt: Zcu.PerThread, maybe_coerced_func_index: Inter
         info.deps.clearRetainingCapacity();
     }
 
-    const func_prog_node = zcu.sema_prog_node.start(ip.getNav(func.owner_nav).fqn.toSlice(ip), 0);
-    defer func_prog_node.end();
+    const func_prog_node = zcu.startSemaProgNode(ip.getNav(func.owner_nav).fqn.toSlice(ip));
+    defer func_prog_node.end(zcu);
 
     const ies_outdated, const new_failed = if (pt.analyzeFuncBody(func_index)) |result|
         .{ prev_failed or result.ies_outdated, false }
@@ -3360,6 +3360,7 @@ pub fn populateTestFunctions(
         ip.mutateVarInit(test_fns_val.toIntern(), new_init);
     }
     {
+        assert(zcu.codegen_prog_node.index == .none);
         zcu.codegen_prog_node = main_progress_node.start("Code Generation", 0);
         defer {
             zcu.codegen_prog_node.end();
@@ -4393,6 +4394,11 @@ pub fn runCodegen(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air, ou
         },
     }
     zcu.comp.link_task_queue.mirReady(zcu.comp, out);
+    if (zcu.pending_codegen_jobs.rmw(.Sub, 1, .monotonic) == 1) {
+        // Decremented to 0, so all done.
+        zcu.codegen_prog_node.end();
+        zcu.codegen_prog_node = .none;
+    }
 }
 fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) error{
     OutOfMemory,
diff --git a/src/link.zig b/src/link.zig
index 844ea7a85c..7d522b94d3 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -1074,7 +1074,7 @@ pub const File = struct {
 
     /// Called when all linker inputs have been sent via `loadInput`. After
     /// this, `loadInput` will not be called anymore.
-    pub fn prelink(base: *File, prog_node: std.Progress.Node) FlushError!void {
+    pub fn prelink(base: *File) FlushError!void {
         assert(!base.post_prelink);
 
         // In this case, an object file is created by the LLVM backend, so
@@ -1085,7 +1085,7 @@ pub const File = struct {
         switch (base.tag) {
             inline .wasm => |tag| {
                 dev.check(tag.devFeature());
-                return @as(*tag.Type(), @fieldParentPtr("base", base)).prelink(prog_node);
+                return @as(*tag.Type(), @fieldParentPtr("base", base)).prelink(base.comp.link_prog_node);
             },
             else => {},
         }
@@ -1293,7 +1293,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
     const base = comp.bin_file orelse return;
     switch (task) {
         .load_explicitly_provided => {
-            const prog_node = comp.work_queue_progress_node.start("Parse Linker Inputs", comp.link_inputs.len);
+            const prog_node = comp.link_prog_node.start("Parse Inputs", comp.link_inputs.len);
             defer prog_node.end();
             for (comp.link_inputs) |input| {
                 base.loadInput(input) catch |err| switch (err) {
@@ -1310,7 +1310,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
             }
         },
         .load_host_libc => {
-            const prog_node = comp.work_queue_progress_node.start("Linker Parse Host libc", 0);
+            const prog_node = comp.link_prog_node.start("Parse Host libc", 0);
             defer prog_node.end();
 
             const target = comp.root_mod.resolved_target.result;
@@ -1369,7 +1369,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
             }
         },
         .load_object => |path| {
-            const prog_node = comp.work_queue_progress_node.start("Linker Parse Object", 0);
+            const prog_node = comp.link_prog_node.start("Parse Object", 0);
             defer prog_node.end();
             base.openLoadObject(path) catch |err| switch (err) {
                 error.LinkFailure => return, // error reported via diags
@@ -1377,7 +1377,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
             };
         },
         .load_archive => |path| {
-            const prog_node = comp.work_queue_progress_node.start("Linker Parse Archive", 0);
+            const prog_node = comp.link_prog_node.start("Parse Archive", 0);
             defer prog_node.end();
             base.openLoadArchive(path, null) catch |err| switch (err) {
                 error.LinkFailure => return, // error reported via link_diags
@@ -1385,7 +1385,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
             };
         },
         .load_dso => |path| {
-            const prog_node = comp.work_queue_progress_node.start("Linker Parse Shared Library", 0);
+            const prog_node = comp.link_prog_node.start("Parse Shared Library", 0);
             defer prog_node.end();
             base.openLoadDso(path, .{
                 .preferred_mode = .dynamic,
@@ -1396,7 +1396,7 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void {
             };
         },
         .load_input => |input| {
-            const prog_node = comp.work_queue_progress_node.start("Linker Parse Input", 0);
+            const prog_node = comp.link_prog_node.start("Parse Input", 0);
             defer prog_node.end();
             base.loadInput(input) catch |err| switch (err) {
                 error.LinkFailure => return, // error reported via link_diags
@@ -1418,6 +1418,9 @@ pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void {
             const zcu = comp.zcu.?;
             const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
+            const fqn_slice = zcu.intern_pool.getNav(nav_index).fqn.toSlice(&zcu.intern_pool);
+            const nav_prog_node = comp.link_prog_node.start(fqn_slice, 0);
+            defer nav_prog_node.end();
             if (zcu.llvm_object) |llvm_object| {
                 llvm_object.updateNav(pt, nav_index) catch |err| switch (err) {
                     error.OutOfMemory => diags.setAllocFailure(),
@@ -1441,6 +1444,9 @@ pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void {
             const nav = zcu.funcInfo(func.func).owner_nav;
             const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid));
             defer pt.deactivate();
+            const fqn_slice = zcu.intern_pool.getNav(nav).fqn.toSlice(&zcu.intern_pool);
+            const nav_prog_node = comp.link_prog_node.start(fqn_slice, 0);
+            defer nav_prog_node.end();
             switch (func.mir.status.load(.monotonic)) {
                 .pending => unreachable,
                 .ready => {},
diff --git a/src/link/Lld.zig b/src/link/Lld.zig
index dd50bd2a2f..4ea809428e 100644
--- a/src/link/Lld.zig
+++ b/src/link/Lld.zig
@@ -267,6 +267,9 @@ pub fn flush(
 
     const comp = lld.base.comp;
     const result = if (comp.config.output_mode == .Lib and comp.config.link_mode == .static) r: {
+        if (!@import("build_options").have_llvm or !comp.config.use_lib_llvm) {
+            return lld.base.comp.link_diags.fail("using lld without libllvm not implemented", .{});
+        }
         break :r linkAsArchive(lld, arena);
     } else switch (lld.ofmt) {
         .coff => coffLink(lld, arena),
diff --git a/src/link/Queue.zig b/src/link/Queue.zig
index 3436be5921..ab5fd89699 100644
--- a/src/link/Queue.zig
+++ b/src/link/Queue.zig
@@ -180,7 +180,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
     // We've finished the prelink tasks, so run prelink if necessary.
     if (comp.bin_file) |lf| {
         if (!lf.post_prelink) {
-            if (lf.prelink(comp.work_queue_progress_node)) |_| {
+            if (lf.prelink()) |_| {
                 lf.post_prelink = true;
             } else |err| switch (err) {
                 error.OutOfMemory => comp.link_diags.setAllocFailure(),
-- 
cgit v1.2.3


From ff89a98c50dbf826564657e7f98cc56194add163 Mon Sep 17 00:00:00 2001
From: mlugg <mlugg@mlugg.co.uk>
Date: Wed, 11 Jun 2025 02:25:55 +0100
Subject: link.Queue: release safety lock before releasing mutex after stopping

---
 src/link/Queue.zig | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/link/Queue.zig')

diff --git a/src/link/Queue.zig b/src/link/Queue.zig
index ab5fd89699..16ee701771 100644
--- a/src/link/Queue.zig
+++ b/src/link/Queue.zig
@@ -147,8 +147,7 @@ pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!
 }
 
 fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
-    q.flush_safety.lock();
-    defer q.flush_safety.unlock();
+    q.flush_safety.lock(); // every `return` site should unlock this before unlocking `q.mutex`
 
     if (std.debug.runtime_safety) {
         q.mutex.lock();
@@ -167,6 +166,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
                 } else {
                     // We're expecting more prelink tasks so can't move on to ZCU tasks.
                     q.state = .finished;
+                    q.flush_safety.unlock();
                     return;
                 }
             }
@@ -200,6 +200,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
             if (q.wip_zcu.items.len == 0) {
                 // We've exhausted all available tasks.
                 q.state = .finished;
+                q.flush_safety.unlock();
                 return;
             }
         }
@@ -215,6 +216,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
             if (status_ptr.load(.monotonic) != .pending) break :pending;
             // We will stop for now, and get restarted once this MIR is ready.
             q.state = .{ .wait_for_mir = task.link_func.mir };
+            q.flush_safety.unlock();
             return;
         }
         link.doZcuTask(comp, tid, task);
-- 
cgit v1.2.3


From 5bb5aaf932b8ed30aebfbb0036e1532abfc6af46 Mon Sep 17 00:00:00 2001
From: mlugg <mlugg@mlugg.co.uk>
Date: Thu, 12 Jun 2025 09:56:37 +0100
Subject: compiler: don't queue too much AIR/MIR

Without this cap, unlucky scheduling and/or details of what pipeline
stages perform best on the host machine could cause many gigabytes of
MIR to be stuck in the queue. At a certain point, pause the main thread
until some of the functions in flight have been processed.
---
 src/Compilation.zig |  6 ++++++
 src/link.zig        |  5 +++++
 src/link/Queue.zig  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

(limited to 'src/link/Queue.zig')

diff --git a/src/Compilation.zig b/src/Compilation.zig
index 9f851cf135..ad184b2bc9 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -4607,12 +4607,17 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
             };
             assert(zcu.pending_codegen_jobs.rmw(.Add, 1, .monotonic) > 0); // the "Code Generation" node hasn't been ended
             zcu.codegen_prog_node.increaseEstimatedTotalItems(1);
+            // This value is used as a heuristic to avoid queueing too much AIR/MIR at once (hence
+            // using a lot of memory). If this would cause too many AIR bytes to be in-flight, we
+            // will block on the `dispatchZcuLinkTask` call below.
+            const air_bytes: u32 = @intCast(air.instructions.len * 5 + air.extra.items.len * 4);
             if (comp.separateCodegenThreadOk()) {
                 // `workerZcuCodegen` takes ownership of `air`.
                 comp.thread_pool.spawnWgId(&comp.link_task_wait_group, workerZcuCodegen, .{ comp, func.func, air, shared_mir });
                 comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
                     .func = func.func,
                     .mir = shared_mir,
+                    .air_bytes = air_bytes,
                 } });
             } else {
                 {
@@ -4624,6 +4629,7 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
                 comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
                     .func = func.func,
                     .mir = shared_mir,
+                    .air_bytes = air_bytes,
                 } });
                 air.deinit(gpa);
             }
diff --git a/src/link.zig b/src/link.zig
index ce98ac8929..9bed6b4131 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -1267,6 +1267,11 @@ pub const ZcuTask = union(enum) {
         /// the codegen job to ensure that the linker receives functions in a deterministic order,
         /// allowing reproducible builds.
         mir: *SharedMir,
+        /// This is not actually used by `doZcuTask`. Instead, `Queue` uses this value as a heuristic
+        /// to avoid queueing too much AIR/MIR for codegen/link at a time. Essentially, we cap the
+        /// total number of AIR bytes which are being processed at once, preventing unbounded memory
+        /// usage when AIR is produced faster than it is processed.
+        air_bytes: u32,
 
         pub const SharedMir = struct {
             /// This is initially `.pending`. When `value` is populated, the codegen thread will set
diff --git a/src/link/Queue.zig b/src/link/Queue.zig
index 16ee701771..d197edab02 100644
--- a/src/link/Queue.zig
+++ b/src/link/Queue.zig
@@ -39,6 +39,21 @@ wip_zcu: std.ArrayListUnmanaged(ZcuTask),
 /// index into `wip_zcu` which we have reached.
 wip_zcu_idx: usize,
 
+/// The sum of all `air_bytes` for all currently-queued `ZcuTask.link_func` tasks. Because
+/// MIR bytes are approximately proportional to AIR bytes, this acts to limit the amount of
+/// AIR and MIR which is queued for codegen and link respectively, to prevent excessive
+/// memory usage if analysis produces AIR faster than it can be processed by codegen/link.
+/// The cap is `max_air_bytes_in_flight`.
+/// Guarded by `mutex`.
+air_bytes_in_flight: u32,
+/// If nonzero, then a call to `enqueueZcu` is blocked waiting to add a `link_func` task, but
+/// cannot until `air_bytes_in_flight` is no greater than this value.
+/// Guarded by `mutex`.
+air_bytes_waiting: u32,
+/// After setting `air_bytes_waiting`, `enqueueZcu` will wait on this condition (with `mutex`).
+/// When `air_bytes_waiting` many bytes can be queued, this condition should be signaled.
+air_bytes_cond: std.Thread.Condition,
+
 /// Guarded by `mutex`.
 state: union(enum) {
     /// The link thread is currently running or queued to run.
@@ -52,6 +67,11 @@ state: union(enum) {
     wait_for_mir: *ZcuTask.LinkFunc.SharedMir,
 },
 
+/// In the worst observed case, MIR is around 50 times as large as AIR. More typically, the ratio is
+/// around 20. Going by that 50x multiplier, and assuming we want to consume no more than 500 MiB of
+/// memory on AIR/MIR, we see a limit of around 10 MiB of AIR in-flight.
+const max_air_bytes_in_flight = 10 * 1024 * 1024;
+
 /// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
 /// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`.
 pub const empty: Queue = .{
@@ -64,6 +84,9 @@ pub const empty: Queue = .{
     .wip_zcu = .empty,
     .wip_zcu_idx = 0,
     .state = .finished,
+    .air_bytes_in_flight = 0,
+    .air_bytes_waiting = 0,
+    .air_bytes_cond = .{},
 };
 /// `lf` is needed to correctly deinit any pending `ZcuTask`s.
 pub fn deinit(q: *Queue, comp: *Compilation) void {
@@ -131,6 +154,16 @@ pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!
     {
         q.mutex.lock();
         defer q.mutex.unlock();
+        // If this is a `link_func` task, we might need to wait for `air_bytes_in_flight` to fall.
+        if (task == .link_func) {
+            const max_in_flight = max_air_bytes_in_flight -| task.link_func.air_bytes;
+            while (q.air_bytes_in_flight > max_in_flight) {
+                q.air_bytes_waiting = task.link_func.air_bytes;
+                q.air_bytes_cond.wait(&q.mutex);
+                q.air_bytes_waiting = 0;
+            }
+            q.air_bytes_in_flight += task.link_func.air_bytes;
+        }
         try q.queued_zcu.append(comp.gpa, task);
         switch (q.state) {
             .running, .wait_for_mir => return,
@@ -221,6 +254,17 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
         }
         link.doZcuTask(comp, tid, task);
         task.deinit(comp.zcu.?);
+        if (task == .link_func) {
+            // Decrease `air_bytes_in_flight`, since we've finished processing this MIR.
+            q.mutex.lock();
+            defer q.mutex.unlock();
+            q.air_bytes_in_flight -= task.link_func.air_bytes;
+            if (q.air_bytes_waiting != 0 and
+                q.air_bytes_in_flight <= max_air_bytes_in_flight -| q.air_bytes_waiting)
+            {
+                q.air_bytes_cond.signal();
+            }
+        }
         q.wip_zcu_idx += 1;
     }
 }
-- 
cgit v1.2.3