From b0fe7eef54dcaab8f7f0c18be042bf1876274ca4 Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0@users.noreply.github.com>
Date: Mon, 15 Jul 2024 03:19:15 -0400
Subject: InternPool: fix various data structure invariants

---
 src/InternPool.zig | 133 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 92 insertions(+), 41 deletions(-)

(limited to 'src/InternPool.zig')

diff --git a/src/InternPool.zig b/src/InternPool.zig
index ff748ebc62..c20009f194 100644
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@@ -939,8 +939,12 @@ const Shard = struct {
                     return @atomicLoad(Value, &entry.value, .acquire);
                 }
                 fn release(entry: *Entry, value: Value) void {
+                    assert(value != .none);
                     @atomicStore(Value, &entry.value, value, .release);
                 }
+                fn resetUnordered(entry: *Entry) void {
+                    @atomicStore(Value, &entry.value, .none, .unordered);
+                }
             };
         };
     }
@@ -6583,35 +6587,55 @@ const GetOrPutKey = union(enum) {
     },
 
     fn put(gop: *GetOrPutKey) Index {
-        return gop.putAt(0);
-    }
-    fn putAt(gop: *GetOrPutKey, offset: u32) Index {
         switch (gop.*) {
             .existing => unreachable,
-            .new => |info| {
+            .new => |*info| {
                 const index = Index.Unwrapped.wrap(.{
                     .tid = info.tid,
-                    .index = info.ip.getLocal(info.tid).mutate.items.len - 1 - offset,
+                    .index = info.ip.getLocal(info.tid).mutate.items.len - 1,
                 }, info.ip);
-                info.shard.shared.map.entries[info.map_index].release(index);
+                gop.putTentative(index);
+                gop.putFinal(index);
+                return index;
+            },
+        }
+    }
+
+    fn putTentative(gop: *GetOrPutKey, index: Index) void {
+        assert(index != .none);
+        switch (gop.*) {
+            .existing => unreachable,
+            .new => |*info| gop.new.shard.shared.map.entries[info.map_index].release(index),
+        }
+    }
+
+    fn putFinal(gop: *GetOrPutKey, index: Index) void {
+        assert(index != .none);
+        switch (gop.*) {
+            .existing => unreachable,
+            .new => |info| {
+                assert(info.shard.shared.map.entries[info.map_index].value == index);
                 info.shard.mutate.map.len += 1;
                 info.shard.mutate.map.mutex.unlock();
                 gop.* = .{ .existing = index };
-                return index;
             },
         }
     }
 
-    fn assign(gop: *GetOrPutKey, new_gop: GetOrPutKey) void {
-        gop.deinit();
-        gop.* = new_gop;
+    fn cancel(gop: *GetOrPutKey) void {
+        switch (gop.*) {
+            .existing => {},
+            .new => |info| info.shard.mutate.map.mutex.unlock(),
+        }
+        gop.* = .{ .existing = undefined };
     }
 
     fn deinit(gop: *GetOrPutKey) void {
         switch (gop.*) {
             .existing => {},
-            .new => |info| info.shard.mutate.map.mutex.unlock(),
+            .new => |info| info.shard.shared.map.entries[info.map_index].resetUnordered(),
         }
+        gop.cancel();
         gop.* = undefined;
     }
 };
@@ -6620,6 +6644,15 @@ fn getOrPutKey(
     gpa: Allocator,
     tid: Zcu.PerThread.Id,
     key: Key,
+) Allocator.Error!GetOrPutKey {
+    return ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, key, 0);
+}
+fn getOrPutKeyEnsuringAdditionalCapacity(
+    ip: *InternPool,
+    gpa: Allocator,
+    tid: Zcu.PerThread.Id,
+    key: Key,
+    additional_capacity: u32,
 ) Allocator.Error!GetOrPutKey {
     const full_hash = key.hash64(ip);
     const hash: u32 = @truncate(full_hash >> 32);
@@ -6655,11 +6688,16 @@ fn getOrPutKey(
         }
     }
     const map_header = map.header().*;
-    if (shard.mutate.map.len >= map_header.capacity * 3 / 5) {
+    const required = shard.mutate.map.len + additional_capacity;
+    if (required >= map_header.capacity * 3 / 5) {
         const arena_state = &ip.getLocal(tid).mutate.arena;
         var arena = arena_state.promote(gpa);
         defer arena_state.* = arena.state;
-        const new_map_capacity = map_header.capacity * 2;
+        var new_map_capacity = map_header.capacity;
+        while (true) {
+            new_map_capacity *= 2;
+            if (required < new_map_capacity * 3 / 5) break;
+        }
         const new_map_buf = try arena.allocator().alignedAlloc(
             u8,
             Map.alignment,
@@ -6728,10 +6766,11 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
             assert(ptr_type.sentinel == .none or ip.typeOf(ptr_type.sentinel) == ptr_type.child);
 
             if (ptr_type.flags.size == .Slice) {
+                gop.cancel();
                 var new_key = key;
                 new_key.ptr_type.flags.size = .Many;
                 const ptr_type_index = try ip.get(gpa, tid, new_key);
-                gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                gop = try ip.getOrPutKey(gpa, tid, key);
 
                 try items.ensureUnusedCapacity(1);
                 items.appendAssumeCapacity(.{
@@ -6911,9 +6950,10 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                 },
                 .anon_decl => |anon_decl| if (ptrsHaveSameAlignment(ip, ptr.ty, ptr_type, anon_decl.orig_ty)) item: {
                     if (ptr.ty != anon_decl.orig_ty) {
+                        gop.cancel();
                         var new_key = key;
                         new_key.ptr.base_addr.anon_decl.orig_ty = ptr.ty;
-                        gop.assign(try ip.getOrPutKey(gpa, tid, new_key));
+                        gop = try ip.getOrPutKey(gpa, tid, new_key);
                         if (gop == .existing) return gop.existing;
                     }
                     break :item .{
@@ -6984,11 +7024,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                         },
                         else => unreachable,
                     }
+                    gop.cancel();
                     const index_index = try ip.get(gpa, tid, .{ .int = .{
                         .ty = .usize_type,
                         .storage = .{ .u64 = base_index.index },
                     } });
-                    gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                    gop = try ip.getOrPutKey(gpa, tid, key);
                     try items.ensureUnusedCapacity(1);
                     items.appendAssumeCapacity(.{
                         .tag = switch (ptr.base_addr) {
@@ -7397,11 +7438,12 @@ pub fn get(ip: *InternPool, gpa: Allocator, tid: Zcu.PerThread.Id, key: Key) All
                 }
                 const elem = switch (aggregate.storage) {
                     .bytes => |bytes| elem: {
+                        gop.cancel();
                         const elem = try ip.get(gpa, tid, .{ .int = .{
                             .ty = .u8_type,
                             .storage = .{ .u64 = bytes.at(0, ip) },
                         } });
-                        gop.assign(try ip.getOrPutKey(gpa, tid, key));
+                        gop = try ip.getOrPutKey(gpa, tid, key);
                         try items.ensureUnusedCapacity(1);
                         break :elem elem;
                     },
@@ -8219,9 +8261,9 @@ pub fn getFuncDeclIes(
         extra.mutate.len = prev_extra_len;
     }
 
-    var func_gop = try ip.getOrPutKey(gpa, tid, .{
+    var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
         .func = extraFuncDecl(tid, extra.list.*, func_decl_extra_index),
-    });
+    }, 3);
     defer func_gop.deinit();
     if (func_gop == .existing) {
         // An existing function type was found; undo the additions to our two arrays.
@@ -8229,23 +8271,28 @@ pub fn getFuncDeclIes(
         extra.mutate.len = prev_extra_len;
         return func_gop.existing;
     }
-    var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
+    func_gop.putTentative(func_index);
+    var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
         .error_set_type = error_set_type,
         .payload_type = key.bare_return_type,
-    } });
+    } }, 2);
     defer error_union_type_gop.deinit();
-    var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
+    error_union_type_gop.putTentative(error_union_type);
+    var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
         .inferred_error_set_type = func_index,
-    });
+    }, 1);
     defer error_set_type_gop.deinit();
+    error_set_type_gop.putTentative(error_set_type);
     var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
         .func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
     });
     defer func_ty_gop.deinit();
-    assert(func_gop.putAt(3) == func_index);
-    assert(error_union_type_gop.putAt(2) == error_union_type);
-    assert(error_set_type_gop.putAt(1) == error_set_type);
-    assert(func_ty_gop.putAt(0) == func_ty);
+    func_ty_gop.putTentative(func_ty);
+
+    func_gop.putFinal(func_index);
+    error_union_type_gop.putFinal(error_union_type);
+    error_set_type_gop.putFinal(error_set_type);
+    func_ty_gop.putFinal(func_ty);
     return func_index;
 }
 
@@ -8504,9 +8551,9 @@ pub fn getFuncInstanceIes(
         extra.mutate.len = prev_extra_len;
     }
 
-    var func_gop = try ip.getOrPutKey(gpa, tid, .{
+    var func_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
         .func = ip.extraFuncInstance(tid, extra.list.*, func_extra_index),
-    });
+    }, 3);
     defer func_gop.deinit();
     if (func_gop == .existing) {
         // Hot path: undo the additions to our two arrays.
@@ -8514,19 +8561,23 @@ pub fn getFuncInstanceIes(
         extra.mutate.len = prev_extra_len;
         return func_gop.existing;
     }
-    var error_union_type_gop = try ip.getOrPutKey(gpa, tid, .{ .error_union_type = .{
+    func_gop.putTentative(func_index);
+    var error_union_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{ .error_union_type = .{
         .error_set_type = error_set_type,
         .payload_type = arg.bare_return_type,
-    } });
+    } }, 2);
     defer error_union_type_gop.deinit();
-    var error_set_type_gop = try ip.getOrPutKey(gpa, tid, .{
+    error_union_type_gop.putTentative(error_union_type);
+    var error_set_type_gop = try ip.getOrPutKeyEnsuringAdditionalCapacity(gpa, tid, .{
         .inferred_error_set_type = func_index,
-    });
+    }, 1);
     defer error_set_type_gop.deinit();
+    error_set_type_gop.putTentative(error_set_type);
     var func_ty_gop = try ip.getOrPutKey(gpa, tid, .{
         .func_type = extraFuncType(tid, extra.list.*, func_type_extra_index),
     });
     defer func_ty_gop.deinit();
+    func_ty_gop.putTentative(func_ty);
     try finishFuncInstance(
         ip,
         gpa,
@@ -8538,10 +8589,11 @@ pub fn getFuncInstanceIes(
         arg.alignment,
         arg.section,
     );
-    assert(func_gop.putAt(3) == func_index);
-    assert(error_union_type_gop.putAt(2) == error_union_type);
-    assert(error_set_type_gop.putAt(1) == error_set_type);
-    assert(func_ty_gop.putAt(0) == func_ty);
+
+    func_gop.putFinal(func_index);
+    error_union_type_gop.putFinal(error_union_type);
+    error_set_type_gop.putFinal(error_set_type);
+    func_ty_gop.putFinal(func_ty);
     return func_index;
 }
 
@@ -10837,19 +10889,18 @@ pub fn getBackingDecl(ip: *const InternPool, val: Index) OptionalDeclIndex {
     while (true) {
         const unwrapped_base = base.unwrap(ip);
         const base_item = unwrapped_base.getItem(ip);
-        const base_extra_items = unwrapped_base.getExtra(ip).view().items(.@"0");
         switch (base_item.tag) {
-            .ptr_decl => return @enumFromInt(base_extra_items[
+            .ptr_decl => return @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                 base_item.data + std.meta.fieldIndex(PtrDecl, "decl").?
             ]),
             inline .ptr_eu_payload,
             .ptr_opt_payload,
             .ptr_elem,
             .ptr_field,
-            => |tag| base = @enumFromInt(base_extra_items[
+            => |tag| base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                 base_item.data + std.meta.fieldIndex(tag.Payload(), "base").?
             ]),
-            .ptr_slice => base = @enumFromInt(base_extra_items[
+            .ptr_slice => base = @enumFromInt(unwrapped_base.getExtra(ip).view().items(.@"0")[
                 base_item.data + std.meta.fieldIndex(PtrSlice, "ptr").?
             ]),
             else => return .none,
-- 
cgit v1.2.3


From 9d1820d20635ade6cb2dbfc36744353715653670 Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0@users.noreply.github.com>
Date: Tue, 16 Jul 2024 06:27:56 -0400
Subject: InternPool: reduce max tid width by one bit

@mlugg keeps stealing my bits!
---
 src/InternPool.zig    | 11 +++++++----
 src/Zcu/PerThread.zig |  3 ++-
 src/main.zig          |  6 +++---
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'src/InternPool.zig')

diff --git a/src/InternPool.zig b/src/InternPool.zig
index c20009f194..cfd6ecbfb0 100644
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@@ -10,6 +10,8 @@ shards: []Shard = &.{},
 global_error_set: GlobalErrorSet = GlobalErrorSet.empty,
 /// Cached number of active bits in a `tid`.
 tid_width: if (single_threaded) u0 else std.math.Log2Int(u32) = 0,
+/// Cached shift amount to put a `tid` in the top bits of a 30-bit value.
+tid_shift_30: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
 /// Cached shift amount to put a `tid` in the top bits of a 31-bit value.
 tid_shift_31: if (single_threaded) u0 else std.math.Log2Int(u32) = if (single_threaded) 0 else 31,
 /// Cached shift amount to put a `tid` in the top bits of a 32-bit value.
@@ -4091,8 +4093,8 @@ pub const Index = enum(u32) {
 
         fn wrap(unwrapped: Unwrapped, ip: *const InternPool) Index {
             assert(@intFromEnum(unwrapped.tid) <= ip.getTidMask());
-            assert(unwrapped.index <= ip.getIndexMask(u31));
-            return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_31 | unwrapped.index);
+            assert(unwrapped.index <= ip.getIndexMask(u30));
+            return @enumFromInt(@as(u32, @intFromEnum(unwrapped.tid)) << ip.tid_shift_30 | unwrapped.index);
         }
 
         pub fn getExtra(unwrapped: Unwrapped, ip: *const InternPool) Local.Extra {
@@ -4131,8 +4133,8 @@ pub const Index = enum(u32) {
             .tid = .main,
             .index = @intFromEnum(index),
         } else .{
-            .tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_31 & ip.getTidMask()),
-            .index = @intFromEnum(index) & ip.getIndexMask(u31),
+            .tid = @enumFromInt(@intFromEnum(index) >> ip.tid_shift_30 & ip.getTidMask()),
+            .index = @intFromEnum(index) & ip.getIndexMask(u30),
         };
     }
 
@@ -5822,6 +5824,7 @@ pub fn init(ip: *InternPool, gpa: Allocator, available_threads: usize) !void {
     });
 
     ip.tid_width = @intCast(std.math.log2_int_ceil(usize, used_threads));
+    ip.tid_shift_30 = if (single_threaded) 0 else 30 - ip.tid_width;
     ip.tid_shift_31 = if (single_threaded) 0 else 31 - ip.tid_width;
     ip.tid_shift_32 = if (single_threaded) 0 else ip.tid_shift_31 +| 1;
     ip.shards = try gpa.alloc(Shard, @as(usize, 1) << ip.tid_width);
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
index c3f569cc7d..ce6b1cb7a9 100644
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@@ -3,7 +3,8 @@ zcu: *Zcu,
 /// Dense, per-thread unique index.
 tid: Id,
 
-pub const Id = if (InternPool.single_threaded) enum { main } else enum(u8) { main, _ };
+pub const IdBacking = u7;
+pub const Id = if (InternPool.single_threaded) enum { main } else enum(IdBacking) { main, _ };
 
 pub fn destroyDecl(pt: Zcu.PerThread, decl_index: Zcu.Decl.Index) void {
     const zcu = pt.zcu;
diff --git a/src/main.zig b/src/main.zig
index 2b36c31865..15e3a212a1 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -3110,7 +3110,7 @@ fn buildOutputType(
     var thread_pool: ThreadPool = undefined;
     try thread_pool.init(.{
         .allocator = gpa,
-        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
         .track_ids = true,
     });
     defer thread_pool.deinit();
@@ -4961,7 +4961,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
     var thread_pool: ThreadPool = undefined;
     try thread_pool.init(.{
         .allocator = gpa,
-        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
         .track_ids = true,
     });
     defer thread_pool.deinit();
@@ -5399,7 +5399,7 @@ fn jitCmd(
     var thread_pool: ThreadPool = undefined;
     try thread_pool.init(.{
         .allocator = gpa,
-        .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(u8)),
+        .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)),
         .track_ids = true,
     });
     defer thread_pool.deinit();
-- 
cgit v1.2.3


From 00fdbf05f39931d1f6c5808e8da4afca85357214 Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0@users.noreply.github.com>
Date: Sun, 14 Jul 2024 23:14:06 -0400
Subject: InternPool: enable separate codegen/linking thread

Let's see what happens :)
---
 src/InternPool.zig | 2 +-
 src/target.zig     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/InternPool.zig')

diff --git a/src/InternPool.zig b/src/InternPool.zig
index cfd6ecbfb0..5379d602a4 100644
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@@ -55,7 +55,7 @@ free_dep_entries: std.ArrayListUnmanaged(DepEntry.Index) = .{},
 /// Whether a multi-threaded intern pool is useful.
 /// Currently `false` until the intern pool is actually accessed
 /// from multiple threads to reduce the cost of this data structure.
-const want_multi_threaded = false;
+const want_multi_threaded = true;
 
 /// Whether a single-threaded intern pool impl is in use.
 pub const single_threaded = builtin.single_threaded or !want_multi_threaded;
diff --git a/src/target.zig b/src/target.zig
index 2accc100b8..b80a4977c3 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -572,7 +572,8 @@ pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, compt
             else => false,
         },
         .separate_thread => switch (backend) {
-            else => false,
+            .stage2_llvm => false,
+            else => true,
         },
     };
 }
-- 
cgit v1.2.3