diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-08-05 22:48:50 +0100 |
|---|---|---|
| committer | Matthew Lugg <mlugg@mlugg.co.uk> | 2025-08-06 21:50:16 +0100 |
| commit | e17a050bc695f7d117b89adb1d258813593ca111 (patch) | |
| tree | f281876485848c783fb9e2a10616b98cda0b2e09 /src/link/Queue.zig | |
| parent | 3de8bbd3d4e262df11a582fb52401b8077b5f352 (diff) | |
| download | zig-e17a050bc695f7d117b89adb1d258813593ca111.tar.gz zig-e17a050bc695f7d117b89adb1d258813593ca111.zip | |
link: prevent deadlock when prelink tasks fail
If an error occured which prevented a prelink task from being queued,
then `pending_prelink_tasks` would never be decremented, which could
cause deadlocks in some cases. So, instead of calculating ahead of time
the number of prelink tasks to expect, we use a simpler strategy which
is much like a wait group: we add 1 to a value when we spawn a worker,
and in the worker function, `defer` decrementing the value. The initial
value is 1, and there's a decrement after all of the workers are
spawned, so once it hits 0, prelink is done (be it with a failure or a
success).
Diffstat (limited to 'src/link/Queue.zig')
| -rw-r--r-- | src/link/Queue.zig | 58 |
1 files changed, 45 insertions, 13 deletions
diff --git a/src/link/Queue.zig b/src/link/Queue.zig index d1595636ac..9f4535e1fe 100644 --- a/src/link/Queue.zig +++ b/src/link/Queue.zig @@ -16,9 +16,9 @@ mutex: std.Thread.Mutex, /// Validates that only one `flushTaskQueue` thread is running at a time. flush_safety: std.debug.SafetyLock, -/// This is the number of prelink tasks which are expected but have not yet been enqueued. -/// Guarded by `mutex`. -pending_prelink_tasks: u32, +/// This value is positive while there are still prelink tasks yet to be queued. Once they are +/// all queued, this value becomes 0, and ZCU tasks can be run. Guarded by `mutex`. +prelink_wait_count: u32, /// Prelink tasks which have been enqueued and are not yet owned by the worker thread. /// Allocated into `gpa`, guarded by `mutex`. @@ -59,7 +59,7 @@ state: union(enum) { /// The link thread is currently running or queued to run. running, /// The link thread is not running or queued, because it has exhausted all immediately available - /// tasks. It should be spawned when more tasks are enqueued. If `pending_prelink_tasks` is not + /// tasks. It should be spawned when more tasks are enqueued. If `prelink_wait_count` is not /// zero, we are specifically waiting for prelink tasks. finished, /// The link thread is not running or queued, because it is waiting for this MIR to be populated. @@ -73,11 +73,11 @@ state: union(enum) { const max_air_bytes_in_flight = 10 * 1024 * 1024; /// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread. -/// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`. +/// The `queued_prelink` field may be appended to before calling `start`. pub const empty: Queue = .{ .mutex = .{}, .flush_safety = .{}, - .pending_prelink_tasks = 0, + .prelink_wait_count = undefined, // set in `start` .queued_prelink = .empty, .wip_prelink = .empty, .queued_zcu = .empty, @@ -100,17 +100,49 @@ pub fn deinit(q: *Queue, comp: *Compilation) void { } /// This is expected to be called exactly once, after which the caller must not directly access -/// `queued_prelink` or `pending_prelink_tasks` any longer. This will spawn the link thread if -/// necessary. +/// `queued_prelink` any longer. This will spawn the link thread if necessary. pub fn start(q: *Queue, comp: *Compilation) void { assert(q.state == .finished); assert(q.queued_zcu.items.len == 0); + // Reset this to 1. We can't init it to 1 in `empty`, because it would fall to 0 on successive + // incremental updates, but we still need the initial 1. + q.prelink_wait_count = 1; if (q.queued_prelink.items.len != 0) { q.state = .running; comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp }); } } +/// Every call to this must be paired with a call to `finishPrelinkItem`. +pub fn startPrelinkItem(q: *Queue) void { + q.mutex.lock(); + defer q.mutex.unlock(); + assert(q.prelink_wait_count > 0); // must not have finished everything already + q.prelink_wait_count += 1; +} +/// This function must be called exactly one more time than `startPrelinkItem` is. The final call +/// indicates that we have finished calling `startPrelinkItem`, so once all pending items finish, +/// we are ready to move on to ZCU tasks. +pub fn finishPrelinkItem(q: *Queue, comp: *Compilation) void { + { + q.mutex.lock(); + defer q.mutex.unlock(); + q.prelink_wait_count -= 1; + if (q.prelink_wait_count != 0) return; + // The prelink task count dropped to 0; restart the linker thread if necessary. + switch (q.state) { + .wait_for_mir => unreachable, // we've not started zcu tasks yet + .running => return, + .finished => {}, + } + assert(q.queued_prelink.items.len == 0); + // Even if there are no ZCU tasks, we must restart the linker thread to make sure + // that `link.File.prelink()` is called. + q.state = .running; + } + comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp }); +} + /// Called by codegen workers after they have populated a `ZcuTask.LinkFunc.SharedMir`. If the link /// thread was waiting for this MIR, it can resume. pub fn mirReady(q: *Queue, comp: *Compilation, func_index: InternPool.Index, mir: *ZcuTask.LinkFunc.SharedMir) void { @@ -130,14 +162,14 @@ pub fn mirReady(q: *Queue, comp: *Compilation, func_index: InternPool.Index, mir comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp }); } -/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that `tasks.len` is -/// less than or equal to `q.pending_prelink_tasks`. Also asserts that `tasks.len` is not 0. +/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that +/// `prelink_wait_count` is not yet 0. Also asserts that `tasks.len` is not 0. pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) Allocator.Error!void { { q.mutex.lock(); defer q.mutex.unlock(); + assert(q.prelink_wait_count > 0); try q.queued_prelink.appendSlice(comp.gpa, tasks); - q.pending_prelink_tasks -= @intCast(tasks.len); switch (q.state) { .wait_for_mir => unreachable, // we've not started zcu tasks yet .running => return, @@ -167,7 +199,7 @@ pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error! try q.queued_zcu.append(comp.gpa, task); switch (q.state) { .running, .wait_for_mir => return, - .finished => if (q.pending_prelink_tasks != 0) return, + .finished => if (q.prelink_wait_count > 0) return, } // Restart the linker thread, unless it would immediately be blocked if (task == .link_func and task.link_func.mir.status.load(.acquire) == .pending) { @@ -194,7 +226,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void { defer q.mutex.unlock(); std.mem.swap(std.ArrayListUnmanaged(PrelinkTask), &q.queued_prelink, &q.wip_prelink); if (q.wip_prelink.items.len == 0) { - if (q.pending_prelink_tasks == 0) { + if (q.prelink_wait_count == 0) { break :prelink; // prelink is done } else { // We're expecting more prelink tasks so can't move on to ZCU tasks. |
