aboutsummaryrefslogtreecommitdiff
path: root/src/link/Queue.zig
diff options
context:
space:
mode:
authormlugg <mlugg@mlugg.co.uk>2025-08-05 22:48:50 +0100
committerMatthew Lugg <mlugg@mlugg.co.uk>2025-08-06 21:50:16 +0100
commite17a050bc695f7d117b89adb1d258813593ca111 (patch)
treef281876485848c783fb9e2a10616b98cda0b2e09 /src/link/Queue.zig
parent3de8bbd3d4e262df11a582fb52401b8077b5f352 (diff)
downloadzig-e17a050bc695f7d117b89adb1d258813593ca111.tar.gz
zig-e17a050bc695f7d117b89adb1d258813593ca111.zip
link: prevent deadlock when prelink tasks fail
If an error occured which prevented a prelink task from being queued, then `pending_prelink_tasks` would never be decremented, which could cause deadlocks in some cases. So, instead of calculating ahead of time the number of prelink tasks to expect, we use a simpler strategy which is much like a wait group: we add 1 to a value when we spawn a worker, and in the worker function, `defer` decrementing the value. The initial value is 1, and there's a decrement after all of the workers are spawned, so once it hits 0, prelink is done (be it with a failure or a success).
Diffstat (limited to 'src/link/Queue.zig')
-rw-r--r--src/link/Queue.zig58
1 files changed, 45 insertions, 13 deletions
diff --git a/src/link/Queue.zig b/src/link/Queue.zig
index d1595636ac..9f4535e1fe 100644
--- a/src/link/Queue.zig
+++ b/src/link/Queue.zig
@@ -16,9 +16,9 @@ mutex: std.Thread.Mutex,
/// Validates that only one `flushTaskQueue` thread is running at a time.
flush_safety: std.debug.SafetyLock,
-/// This is the number of prelink tasks which are expected but have not yet been enqueued.
-/// Guarded by `mutex`.
-pending_prelink_tasks: u32,
+/// This value is positive while there are still prelink tasks yet to be queued. Once they are
+/// all queued, this value becomes 0, and ZCU tasks can be run. Guarded by `mutex`.
+prelink_wait_count: u32,
/// Prelink tasks which have been enqueued and are not yet owned by the worker thread.
/// Allocated into `gpa`, guarded by `mutex`.
@@ -59,7 +59,7 @@ state: union(enum) {
/// The link thread is currently running or queued to run.
running,
/// The link thread is not running or queued, because it has exhausted all immediately available
- /// tasks. It should be spawned when more tasks are enqueued. If `pending_prelink_tasks` is not
+ /// tasks. It should be spawned when more tasks are enqueued. If `prelink_wait_count` is not
/// zero, we are specifically waiting for prelink tasks.
finished,
/// The link thread is not running or queued, because it is waiting for this MIR to be populated.
@@ -73,11 +73,11 @@ state: union(enum) {
const max_air_bytes_in_flight = 10 * 1024 * 1024;
/// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
-/// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`.
+/// The `queued_prelink` field may be appended to before calling `start`.
pub const empty: Queue = .{
.mutex = .{},
.flush_safety = .{},
- .pending_prelink_tasks = 0,
+ .prelink_wait_count = undefined, // set in `start`
.queued_prelink = .empty,
.wip_prelink = .empty,
.queued_zcu = .empty,
@@ -100,17 +100,49 @@ pub fn deinit(q: *Queue, comp: *Compilation) void {
}
/// This is expected to be called exactly once, after which the caller must not directly access
-/// `queued_prelink` or `pending_prelink_tasks` any longer. This will spawn the link thread if
-/// necessary.
+/// `queued_prelink` any longer. This will spawn the link thread if necessary.
pub fn start(q: *Queue, comp: *Compilation) void {
assert(q.state == .finished);
assert(q.queued_zcu.items.len == 0);
+ // Reset this to 1. We can't init it to 1 in `empty`, because it would fall to 0 on successive
+ // incremental updates, but we still need the initial 1.
+ q.prelink_wait_count = 1;
if (q.queued_prelink.items.len != 0) {
q.state = .running;
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
}
+/// Every call to this must be paired with a call to `finishPrelinkItem`.
+pub fn startPrelinkItem(q: *Queue) void {
+ q.mutex.lock();
+ defer q.mutex.unlock();
+ assert(q.prelink_wait_count > 0); // must not have finished everything already
+ q.prelink_wait_count += 1;
+}
+/// This function must be called exactly one more time than `startPrelinkItem` is. The final call
+/// indicates that we have finished calling `startPrelinkItem`, so once all pending items finish,
+/// we are ready to move on to ZCU tasks.
+pub fn finishPrelinkItem(q: *Queue, comp: *Compilation) void {
+ {
+ q.mutex.lock();
+ defer q.mutex.unlock();
+ q.prelink_wait_count -= 1;
+ if (q.prelink_wait_count != 0) return;
+ // The prelink task count dropped to 0; restart the linker thread if necessary.
+ switch (q.state) {
+ .wait_for_mir => unreachable, // we've not started zcu tasks yet
+ .running => return,
+ .finished => {},
+ }
+ assert(q.queued_prelink.items.len == 0);
+ // Even if there are no ZCU tasks, we must restart the linker thread to make sure
+ // that `link.File.prelink()` is called.
+ q.state = .running;
+ }
+ comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
+}
+
/// Called by codegen workers after they have populated a `ZcuTask.LinkFunc.SharedMir`. If the link
/// thread was waiting for this MIR, it can resume.
pub fn mirReady(q: *Queue, comp: *Compilation, func_index: InternPool.Index, mir: *ZcuTask.LinkFunc.SharedMir) void {
@@ -130,14 +162,14 @@ pub fn mirReady(q: *Queue, comp: *Compilation, func_index: InternPool.Index, mir
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, flushTaskQueue, .{ q, comp });
}
-/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that `tasks.len` is
-/// less than or equal to `q.pending_prelink_tasks`. Also asserts that `tasks.len` is not 0.
+/// Enqueues all prelink tasks in `tasks`. Asserts that they were expected, i.e. that
+/// `prelink_wait_count` is not yet 0. Also asserts that `tasks.len` is not 0.
pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) Allocator.Error!void {
{
q.mutex.lock();
defer q.mutex.unlock();
+ assert(q.prelink_wait_count > 0);
try q.queued_prelink.appendSlice(comp.gpa, tasks);
- q.pending_prelink_tasks -= @intCast(tasks.len);
switch (q.state) {
.wait_for_mir => unreachable, // we've not started zcu tasks yet
.running => return,
@@ -167,7 +199,7 @@ pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!
try q.queued_zcu.append(comp.gpa, task);
switch (q.state) {
.running, .wait_for_mir => return,
- .finished => if (q.pending_prelink_tasks != 0) return,
+ .finished => if (q.prelink_wait_count > 0) return,
}
// Restart the linker thread, unless it would immediately be blocked
if (task == .link_func and task.link_func.mir.status.load(.acquire) == .pending) {
@@ -194,7 +226,7 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
defer q.mutex.unlock();
std.mem.swap(std.ArrayListUnmanaged(PrelinkTask), &q.queued_prelink, &q.wip_prelink);
if (q.wip_prelink.items.len == 0) {
- if (q.pending_prelink_tasks == 0) {
+ if (q.prelink_wait_count == 0) {
break :prelink; // prelink is done
} else {
// We're expecting more prelink tasks so can't move on to ZCU tasks.