diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2021-06-04 01:12:38 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-06-04 01:12:38 -0400 |
| commit | 7d15a3ac71c5d8dc8c08dfd8ea8ad43d4eae188a (patch) | |
| tree | ae007106526e300bb7143be003fe8d847ba7230c /lib/std | |
| parent | 87dae0ce98fde1957a9290c22866b3101ce419d8 (diff) | |
| parent | 6953c8544b68c788dca4ed065e4a15eccbd4446b (diff) | |
| download | zig-7d15a3ac71c5d8dc8c08dfd8ea8ad43d4eae188a.tar.gz zig-7d15a3ac71c5d8dc8c08dfd8ea8ad43d4eae188a.zip | |
Merge pull request #8975 from SpexGuy/hash-map-updates
Breaking hash map changes for 0.8.0
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/array_hash_map.zig | 1786 | ||||
| -rw-r--r-- | lib/std/buf_map.zig | 67 | ||||
| -rw-r--r-- | lib/std/buf_set.zig | 59 | ||||
| -rw-r--r-- | lib/std/build.zig | 42 | ||||
| -rw-r--r-- | lib/std/build/run.zig | 10 | ||||
| -rw-r--r-- | lib/std/child_process.zig | 24 | ||||
| -rw-r--r-- | lib/std/fs/watch.zig | 114 | ||||
| -rw-r--r-- | lib/std/hash_map.zig | 1097 | ||||
| -rw-r--r-- | lib/std/heap/general_purpose_allocator.zig | 20 | ||||
| -rw-r--r-- | lib/std/json.zig | 4 | ||||
| -rw-r--r-- | lib/std/math.zig | 51 | ||||
| -rw-r--r-- | lib/std/multi_array_list.zig | 160 | ||||
| -rw-r--r-- | lib/std/process.zig | 8 |
13 files changed, 2534 insertions, 908 deletions
diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig index 831b21fe19..7acc65c66b 100644 --- a/lib/std/array_hash_map.zig +++ b/lib/std/array_hash_map.zig @@ -17,23 +17,36 @@ const Allocator = mem.Allocator; const builtin = std.builtin; const hash_map = @This(); +/// An ArrayHashMap with default hash and equal functions. +/// See AutoContext for a description of the hash and equal implementations. pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type { - return ArrayHashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), !autoEqlIsCheap(K)); + return ArrayHashMap(K, V, AutoContext(K), !autoEqlIsCheap(K)); } +/// An ArrayHashMapUnmanaged with default hash and equal functions. +/// See AutoContext for a description of the hash and equal implementations. pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type { - return ArrayHashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), !autoEqlIsCheap(K)); + return ArrayHashMapUnmanaged(K, V, AutoContext(K), !autoEqlIsCheap(K)); } /// Builtin hashmap for strings as keys. pub fn StringArrayHashMap(comptime V: type) type { - return ArrayHashMap([]const u8, V, hashString, eqlString, true); + return ArrayHashMap([]const u8, V, StringContext, true); } pub fn StringArrayHashMapUnmanaged(comptime V: type) type { - return ArrayHashMapUnmanaged([]const u8, V, hashString, eqlString, true); + return ArrayHashMapUnmanaged([]const u8, V, StringContext, true); } +pub const StringContext = struct { + pub fn hash(self: @This(), s: []const u8) u32 { + return hashString(s); + } + pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { + return eqlString(a, b); + } +}; + pub fn eqlString(a: []const u8, b: []const u8) bool { return mem.eql(u8, a, b); } @@ -54,83 +67,112 @@ pub fn hashString(s: []const u8) u32 { /// but only has to call `eql` for hash collisions. /// If typical operations (except iteration over entries) need to be faster, prefer /// the alternative `std.HashMap`. +/// Context must be a struct type with two member functions: +/// hash(self, K) u32 +/// eql(self, K, K) bool +/// Adapted variants of many functions are provided. These variants +/// take a pseudo key instead of a key. Their context must have the functions: +/// hash(self, PseudoKey) u32 +/// eql(self, PseudoKey, K) bool pub fn ArrayHashMap( comptime K: type, comptime V: type, - comptime hash: fn (key: K) u32, - comptime eql: fn (a: K, b: K) bool, + comptime Context: type, comptime store_hash: bool, ) type { + comptime std.hash_map.verifyContext(Context, K, K, u32); return struct { unmanaged: Unmanaged, allocator: *Allocator, + ctx: Context, + + /// The ArrayHashMapUnmanaged type using the same settings as this managed map. + pub const Unmanaged = ArrayHashMapUnmanaged(K, V, Context, store_hash); - pub const Unmanaged = ArrayHashMapUnmanaged(K, V, hash, eql, store_hash); + /// Pointers to a key and value in the backing store of this map. + /// Modifying the key is allowed only if it does not change the hash. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this ArrayHashMap is modified, + /// unless `ensureCapacity` was previously used. pub const Entry = Unmanaged.Entry; - pub const Hash = Unmanaged.Hash; - pub const GetOrPutResult = Unmanaged.GetOrPutResult; - /// Deprecated. Iterate using `items`. - pub const Iterator = struct { - hm: *const Self, - /// Iterator through the entry array. - index: usize, + /// A KV pair which has been copied out of the backing store + pub const KV = Unmanaged.KV; - pub fn next(it: *Iterator) ?*Entry { - if (it.index >= it.hm.unmanaged.entries.items.len) return null; - const result = &it.hm.unmanaged.entries.items[it.index]; - it.index += 1; - return result; - } + /// The Data type used for the MultiArrayList backing this map + pub const Data = Unmanaged.Data; + /// The MultiArrayList type backing this map + pub const DataList = Unmanaged.DataList; - /// Reset the iterator to the initial index - pub fn reset(it: *Iterator) void { - it.index = 0; - } - }; + /// The stored hash type, either u32 or void. + pub const Hash = Unmanaged.Hash; + + /// getOrPut variants return this structure, with pointers + /// to the backing store and a flag to indicate whether an + /// existing entry was found. + /// Modifying the key is allowed only if it does not change the hash. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this ArrayHashMap is modified, + /// unless `ensureCapacity` was previously used. + pub const GetOrPutResult = Unmanaged.GetOrPutResult; + + /// An Iterator over Entry pointers. + pub const Iterator = Unmanaged.Iterator; const Self = @This(); - const Index = Unmanaged.Index; + /// Create an ArrayHashMap instance which will use a specified allocator. pub fn init(allocator: *Allocator) Self { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call initContext instead."); + return initContext(allocator, undefined); + } + pub fn initContext(allocator: *Allocator, ctx: Context) Self { return .{ .unmanaged = .{}, .allocator = allocator, + .ctx = ctx, }; } - /// `ArrayHashMap` takes ownership of the passed in array list. The array list must have - /// been allocated with `allocator`. - /// Deinitialize with `deinit`. - pub fn fromOwnedArrayList(allocator: *Allocator, entries: std.ArrayListUnmanaged(Entry)) !Self { - return Self{ - .unmanaged = try Unmanaged.fromOwnedArrayList(allocator, entries), - .allocator = allocator, - }; - } - + /// Frees the backing allocation and leaves the map in an undefined state. + /// Note that this does not free keys or values. You must take care of that + /// before calling this function, if it is needed. pub fn deinit(self: *Self) void { self.unmanaged.deinit(self.allocator); self.* = undefined; } + /// Clears the map but retains the backing allocation for future use. pub fn clearRetainingCapacity(self: *Self) void { return self.unmanaged.clearRetainingCapacity(); } + /// Clears the map and releases the backing allocation pub fn clearAndFree(self: *Self) void { return self.unmanaged.clearAndFree(self.allocator); } + /// Returns the number of KV pairs stored in this map. pub fn count(self: Self) usize { return self.unmanaged.count(); } + /// Returns the backing array of keys in this map. + /// Modifying the map may invalidate this array. + pub fn keys(self: Self) []K { + return self.unmanaged.keys(); + } + /// Returns the backing array of values in this map. + /// Modifying the map may invalidate this array. + pub fn values(self: Self) []V { + return self.unmanaged.values(); + } + + /// Returns an iterator over the pairs in this map. + /// Modifying the map may invalidate this iterator. pub fn iterator(self: *const Self) Iterator { - return Iterator{ - .hm = self, - .index = 0, - }; + return self.unmanaged.iterator(); } /// If key exists this function cannot fail. @@ -140,7 +182,10 @@ pub fn ArrayHashMap( /// the `Entry` pointer points to it. Caller should then initialize /// the value (but not the key). pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { - return self.unmanaged.getOrPut(self.allocator, key); + return self.unmanaged.getOrPutContext(self.allocator, key, self.ctx); + } + pub fn getOrPutAdapted(self: *Self, key: anytype, ctx: anytype) !GetOrPutResult { + return self.unmanaged.getOrPutContextAdapted(key, ctx, self.ctx); } /// If there is an existing item with `key`, then the result @@ -151,11 +196,13 @@ pub fn ArrayHashMap( /// If a new entry needs to be stored, this function asserts there /// is enough capacity to store it. pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { - return self.unmanaged.getOrPutAssumeCapacity(key); + return self.unmanaged.getOrPutAssumeCapacityContext(key, self.ctx); } - - pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { - return self.unmanaged.getOrPutValue(self.allocator, key, value); + pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult { + return self.unmanaged.getOrPutAssumeCapacityAdapted(key, ctx); + } + pub fn getOrPutValue(self: *Self, key: K, value: V) !GetOrPutResult { + return self.unmanaged.getOrPutValueContext(self.allocator, key, value, self.ctx); } /// Deprecated: call `ensureUnusedCapacity` or `ensureTotalCapacity`. @@ -164,14 +211,14 @@ pub fn ArrayHashMap( /// Increases capacity, guaranteeing that insertions up until the /// `expected_count` will not cause an allocation, and therefore cannot fail. pub fn ensureTotalCapacity(self: *Self, new_capacity: usize) !void { - return self.unmanaged.ensureTotalCapacity(self.allocator, new_capacity); + return self.unmanaged.ensureTotalCapacityContext(self.allocator, new_capacity, self.ctx); } /// Increases capacity, guaranteeing that insertions up until /// `additional_count` **more** items will not cause an allocation, and /// therefore cannot fail. pub fn ensureUnusedCapacity(self: *Self, additional_count: usize) !void { - return self.unmanaged.ensureUnusedCapacity(self.allocator, additional_count); + return self.unmanaged.ensureUnusedCapacityContext(self.allocator, additional_count, self.ctx); } /// Returns the number of total elements which may be present before it is @@ -183,119 +230,187 @@ pub fn ArrayHashMap( /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPut`. pub fn put(self: *Self, key: K, value: V) !void { - return self.unmanaged.put(self.allocator, key, value); + return self.unmanaged.putContext(self.allocator, key, value, self.ctx); } /// Inserts a key-value pair into the hash map, asserting that no previous /// entry with the same key is already present pub fn putNoClobber(self: *Self, key: K, value: V) !void { - return self.unmanaged.putNoClobber(self.allocator, key, value); + return self.unmanaged.putNoClobberContext(self.allocator, key, value, self.ctx); } /// Asserts there is enough capacity to store the new key-value pair. /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { - return self.unmanaged.putAssumeCapacity(key, value); + return self.unmanaged.putAssumeCapacityContext(key, value, self.ctx); } /// Asserts there is enough capacity to store the new key-value pair. /// Asserts that it does not clobber any existing data. /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - return self.unmanaged.putAssumeCapacityNoClobber(key, value); + return self.unmanaged.putAssumeCapacityNoClobberContext(key, value, self.ctx); } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. - pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { - return self.unmanaged.fetchPut(self.allocator, key, value); + pub fn fetchPut(self: *Self, key: K, value: V) !?KV { + return self.unmanaged.fetchPutContext(self.allocator, key, value, self.ctx); } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. /// If insertion happuns, asserts there is enough capacity without allocating. - pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { - return self.unmanaged.fetchPutAssumeCapacity(key, value); + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?KV { + return self.unmanaged.fetchPutAssumeCapacityContext(key, value, self.ctx); } - pub fn getEntry(self: Self, key: K) ?*Entry { - return self.unmanaged.getEntry(key); + /// Finds pointers to the key and value storage associated with a key. + pub fn getEntry(self: Self, key: K) ?Entry { + return self.unmanaged.getEntryContext(key, self.ctx); + } + pub fn getEntryAdapted(self: Self, key: anytype, ctx: anytype) ?Entry { + return self.unmanaged.getEntryAdapted(key, ctx); } + /// Finds the index in the `entries` array where a key is stored pub fn getIndex(self: Self, key: K) ?usize { - return self.unmanaged.getIndex(key); + return self.unmanaged.getIndexContext(key, self.ctx); + } + pub fn getIndexAdapted(self: Self, key: anytype, ctx: anytype) ?usize { + return self.unmanaged.getIndexAdapted(key, ctx); } + /// Find the value associated with a key pub fn get(self: Self, key: K) ?V { - return self.unmanaged.get(key); + return self.unmanaged.getContext(key, self.ctx); + } + pub fn getAdapted(self: Self, key: anytype, ctx: anytype) ?V { + return self.unmanaged.getAdapted(key, ctx); } + /// Find a pointer to the value associated with a key + pub fn getPtr(self: Self, key: K) ?*V { + return self.unmanaged.getPtrContext(key, self.ctx); + } + pub fn getPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*V { + return self.unmanaged.getPtrAdapted(key, ctx); + } + + /// Check whether a key is stored in the map pub fn contains(self: Self, key: K) bool { - return self.unmanaged.contains(key); + return self.unmanaged.containsContext(key, self.ctx); + } + pub fn containsAdapted(self: Self, key: anytype, ctx: anytype) bool { + return self.unmanaged.containsAdapted(key, ctx); } /// If there is an `Entry` with a matching key, it is deleted from /// the hash map, and then returned from this function. The entry is /// removed from the underlying array by swapping it with the last /// element. - pub fn swapRemove(self: *Self, key: K) ?Entry { - return self.unmanaged.swapRemove(key); + pub fn fetchSwapRemove(self: *Self, key: K) ?KV { + return self.unmanaged.fetchSwapRemoveContext(key, self.ctx); + } + pub fn fetchSwapRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + return self.unmanaged.fetchSwapRemoveContextAdapted(key, ctx, self.ctx); } /// If there is an `Entry` with a matching key, it is deleted from /// the hash map, and then returned from this function. The entry is /// removed from the underlying array by shifting all elements forward /// thereby maintaining the current ordering. - pub fn orderedRemove(self: *Self, key: K) ?Entry { - return self.unmanaged.orderedRemove(key); + pub fn fetchOrderedRemove(self: *Self, key: K) ?KV { + return self.unmanaged.fetchOrderedRemoveContext(key, self.ctx); + } + pub fn fetchOrderedRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + return self.unmanaged.fetchOrderedRemoveContextAdapted(key, ctx, self.ctx); } - /// TODO: deprecated: call swapRemoveAssertDiscard instead. - pub fn removeAssertDiscard(self: *Self, key: K) void { - return self.unmanaged.removeAssertDiscard(key); + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map. The entry is removed from the underlying array + /// by swapping it with the last element. Returns true if an entry + /// was removed, false otherwise. + pub fn swapRemove(self: *Self, key: K) bool { + return self.unmanaged.swapRemoveContext(key, self.ctx); + } + pub fn swapRemoveAdapted(self: *Self, key: anytype, ctx: anytype) bool { + return self.unmanaged.swapRemoveContextAdapted(key, ctx, self.ctx); } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map - /// by swapping it with the last element, and discards it. - pub fn swapRemoveAssertDiscard(self: *Self, key: K) void { - return self.unmanaged.swapRemoveAssertDiscard(key); + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map. The entry is removed from the underlying array + /// by shifting all elements forward, thereby maintaining the + /// current ordering. Returns true if an entry was removed, false otherwise. + pub fn orderedRemove(self: *Self, key: K) bool { + return self.unmanaged.orderedRemoveContext(key, self.ctx); + } + pub fn orderedRemoveAdapted(self: *Self, key: anytype, ctx: anytype) bool { + return self.unmanaged.orderedRemoveContextAdapted(key, ctx, self.ctx); } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map - /// by by shifting all elements forward thereby maintaining the current ordering. - pub fn orderedRemoveAssertDiscard(self: *Self, key: K) void { - return self.unmanaged.orderedRemoveAssertDiscard(key); + /// Deletes the item at the specified index in `entries` from + /// the hash map. The entry is removed from the underlying array + /// by swapping it with the last element. + pub fn swapRemoveAt(self: *Self, index: usize) void { + self.unmanaged.swapRemoveAtContext(index, self.ctx); } - pub fn items(self: Self) []Entry { - return self.unmanaged.items(); + /// Deletes the item at the specified index in `entries` from + /// the hash map. The entry is removed from the underlying array + /// by shifting all elements forward, thereby maintaining the + /// current ordering. + pub fn orderedRemoveAt(self: *Self, index: usize) void { + self.unmanaged.orderedRemoveAtContext(index, self.ctx); } + /// Create a copy of the hash map which can be modified separately. + /// The copy uses the same context and allocator as this instance. pub fn clone(self: Self) !Self { - var other = try self.unmanaged.clone(self.allocator); - return other.promote(self.allocator); + var other = try self.unmanaged.cloneContext(self.allocator, self.ctx); + return other.promoteContext(self.allocator, self.ctx); + } + /// Create a copy of the hash map which can be modified separately. + /// The copy uses the same context as this instance, but the specified + /// allocator. + pub fn cloneWithAllocator(self: Self, allocator: *Allocator) !Self { + var other = try self.unmanaged.cloneContext(allocator, self.ctx); + return other.promoteContext(allocator, self.ctx); + } + /// Create a copy of the hash map which can be modified separately. + /// The copy uses the same allocator as this instance, but the + /// specified context. + pub fn cloneWithContext(self: Self, ctx: anytype) !ArrayHashMap(K, V, @TypeOf(ctx), store_hash) { + var other = try self.unmanaged.cloneContext(self.allocator, ctx); + return other.promoteContext(self.allocator, ctx); + } + /// Create a copy of the hash map which can be modified separately. + /// The copy uses the specified allocator and context. + pub fn cloneWithAllocatorAndContext(self: Self, allocator: *Allocator, ctx: anytype) !ArrayHashMap(K, V, @TypeOf(ctx), store_hash) { + var other = try self.unmanaged.cloneContext(allocator, ctx); + return other.promoteContext(allocator, ctx); } /// Rebuilds the key indexes. If the underlying entries has been modified directly, users /// can call `reIndex` to update the indexes to account for these new entries. pub fn reIndex(self: *Self) !void { - return self.unmanaged.reIndex(self.allocator); + return self.unmanaged.reIndexContext(self.allocator, self.ctx); } /// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated /// index entries. Keeps capacity the same. pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void { - return self.unmanaged.shrinkRetainingCapacity(new_len); + return self.unmanaged.shrinkRetainingCapacityContext(new_len, self.ctx); } /// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated /// index entries. Reduces allocated capacity. pub fn shrinkAndFree(self: *Self, new_len: usize) void { - return self.unmanaged.shrinkAndFree(self.allocator, new_len); + return self.unmanaged.shrinkAndFreeContext(self.allocator, new_len, self.ctx); } /// Removes the last inserted `Entry` in the hash map and returns it. - pub fn pop(self: *Self) Entry { - return self.unmanaged.pop(); + pub fn pop(self: *Self) KV { + return self.unmanaged.popContext(self.ctx); } }; } @@ -317,16 +432,23 @@ pub fn ArrayHashMap( /// functions. It does not store each item's hash in the table. Setting `store_hash` /// to `true` incurs slightly more memory cost by storing each key's hash in the table /// but guarantees only one call to `eql` per insertion/deletion. +/// Context must be a struct type with two member functions: +/// hash(self, K) u32 +/// eql(self, K, K) bool +/// Adapted variants of many functions are provided. These variants +/// take a pseudo key instead of a key. Their context must have the functions: +/// hash(self, PseudoKey) u32 +/// eql(self, PseudoKey, K) bool pub fn ArrayHashMapUnmanaged( comptime K: type, comptime V: type, - comptime hash: fn (key: K) u32, - comptime eql: fn (a: K, b: K) bool, + comptime Context: type, comptime store_hash: bool, ) type { + comptime std.hash_map.verifyContext(Context, K, K, u32); return struct { /// It is permitted to access this field directly. - entries: std.ArrayListUnmanaged(Entry) = .{}, + entries: DataList = .{}, /// When entries length is less than `linear_scan_max`, this remains `null`. /// Once entries length grows big enough, this field is allocated. There is @@ -334,26 +456,54 @@ pub fn ArrayHashMapUnmanaged( /// by how many total indexes there are. index_header: ?*IndexHeader = null, - /// Modifying the key is illegal behavior. + /// Modifying the key is allowed only if it does not change the hash. /// Modifying the value is allowed. /// Entry pointers become invalid whenever this ArrayHashMap is modified, /// unless `ensureCapacity` was previously used. pub const Entry = struct { - /// This field is `void` if `store_hash` is `false`. + key_ptr: *K, + value_ptr: *V, + }; + + /// A KV pair which has been copied out of the backing store + pub const KV = struct { + key: K, + value: V, + }; + + /// The Data type used for the MultiArrayList backing this map + pub const Data = struct { hash: Hash, key: K, value: V, }; + /// The MultiArrayList type backing this map + pub const DataList = std.MultiArrayList(Data); + + /// The stored hash type, either u32 or void. pub const Hash = if (store_hash) u32 else void; + /// getOrPut variants return this structure, with pointers + /// to the backing store and a flag to indicate whether an + /// existing entry was found. + /// Modifying the key is allowed only if it does not change the hash. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this ArrayHashMap is modified, + /// unless `ensureCapacity` was previously used. pub const GetOrPutResult = struct { - entry: *Entry, + key_ptr: *K, + value_ptr: *V, found_existing: bool, index: usize, }; - pub const Managed = ArrayHashMap(K, V, hash, eql, store_hash); + /// The ArrayHashMap type using the same settings as this managed map. + pub const Managed = ArrayHashMap(K, V, Context, store_hash); + + /// Some functions require a context only if hashes are not stored. + /// To keep the api simple, this type is only used internally. + const ByIndexContext = if (store_hash) void else Context; const Self = @This(); @@ -362,25 +512,26 @@ pub fn ArrayHashMapUnmanaged( const RemovalType = enum { swap, ordered, - index_only, }; + /// Convert from an unmanaged map to a managed map. After calling this, + /// the promoted map should no longer be used. pub fn promote(self: Self, allocator: *Allocator) Managed { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call promoteContext instead."); + return self.promoteContext(allocator, undefined); + } + pub fn promoteContext(self: Self, allocator: *Allocator, ctx: Context) Managed { return .{ .unmanaged = self, .allocator = allocator, + .ctx = ctx, }; } - /// `ArrayHashMapUnmanaged` takes ownership of the passed in array list. The array list must - /// have been allocated with `allocator`. - /// Deinitialize with `deinit`. - pub fn fromOwnedArrayList(allocator: *Allocator, entries: std.ArrayListUnmanaged(Entry)) !Self { - var array_hash_map = Self{ .entries = entries }; - try array_hash_map.reIndex(allocator); - return array_hash_map; - } - + /// Frees the backing allocation and leaves the map in an undefined state. + /// Note that this does not free keys or values. You must take care of that + /// before calling this function, if it is needed. pub fn deinit(self: *Self, allocator: *Allocator) void { self.entries.deinit(allocator); if (self.index_header) |header| { @@ -389,19 +540,19 @@ pub fn ArrayHashMapUnmanaged( self.* = undefined; } + /// Clears the map but retains the backing allocation for future use. pub fn clearRetainingCapacity(self: *Self) void { - self.entries.items.len = 0; + self.entries.len = 0; if (self.index_header) |header| { - header.max_distance_from_start_index = 0; switch (header.capacityIndexType()) { .u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty), .u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty), .u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty), - .usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty), } } } + /// Clears the map and releases the backing allocation pub fn clearAndFree(self: *Self, allocator: *Allocator) void { self.entries.shrinkAndFree(allocator, 0); if (self.index_header) |header| { @@ -410,9 +561,54 @@ pub fn ArrayHashMapUnmanaged( } } + /// Returns the number of KV pairs stored in this map. pub fn count(self: Self) usize { - return self.entries.items.len; + return self.entries.len; + } + + /// Returns the backing array of keys in this map. + /// Modifying the map may invalidate this array. + pub fn keys(self: Self) []K { + return self.entries.items(.key); + } + /// Returns the backing array of values in this map. + /// Modifying the map may invalidate this array. + pub fn values(self: Self) []V { + return self.entries.items(.value); + } + + /// Returns an iterator over the pairs in this map. + /// Modifying the map may invalidate this iterator. + pub fn iterator(self: Self) Iterator { + const slice = self.entries.slice(); + return .{ + .keys = slice.items(.key).ptr, + .values = slice.items(.value).ptr, + .len = @intCast(u32, slice.len), + }; } + pub const Iterator = struct { + keys: [*]K, + values: [*]V, + len: u32, + index: u32 = 0, + + pub fn next(it: *Iterator) ?Entry { + if (it.index >= it.len) return null; + const result = Entry{ + .key_ptr = &it.keys[it.index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &it.values[it.index], + }; + it.index += 1; + return result; + } + + /// Reset the iterator to the initial index + pub fn reset(it: *Iterator) void { + it.index = 0; + } + }; /// If key exists this function cannot fail. /// If there is an existing item with `key`, then the result @@ -421,16 +617,36 @@ pub fn ArrayHashMapUnmanaged( /// the `Entry` pointer points to it. Caller should then initialize /// the value (but not the key). pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { - self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutContext instead."); + return self.getOrPutContext(allocator, key, undefined); + } + pub fn getOrPutContext(self: *Self, allocator: *Allocator, key: K, ctx: Context) !GetOrPutResult { + const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx); + if (!gop.found_existing) { + gop.key_ptr.* = key; + } + return gop; + } + pub fn getOrPutAdapted(self: *Self, allocator: *Allocator, key: anytype, key_ctx: anytype) !GetOrPutResult { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutContextAdapted instead."); + return self.getOrPutContextAdapted(allocator, key, key_ctx, undefined); + } + pub fn getOrPutContextAdapted(self: *Self, allocator: *Allocator, key: anytype, key_ctx: anytype, ctx: Context) !GetOrPutResult { + self.ensureTotalCapacityContext(allocator, self.entries.len + 1, ctx) catch |err| { // "If key exists this function cannot fail." - const index = self.getIndex(key) orelse return err; + const index = self.getIndexAdapted(key, key_ctx) orelse return err; + const slice = self.entries.slice(); return GetOrPutResult{ - .entry = &self.entries.items[index], + .key_ptr = &slice.items(.key)[index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &slice.items(.value)[index], .found_existing = true, .index = index, }; }; - return self.getOrPutAssumeCapacity(key); + return self.getOrPutAssumeCapacityAdapted(key, key_ctx); } /// If there is an existing item with `key`, then the result @@ -441,45 +657,75 @@ pub fn ArrayHashMapUnmanaged( /// If a new entry needs to be stored, this function asserts there /// is enough capacity to store it. pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutAssumeCapacityContext instead."); + return self.getOrPutAssumeCapacityContext(key, undefined); + } + pub fn getOrPutAssumeCapacityContext(self: *Self, key: K, ctx: Context) GetOrPutResult { + const gop = self.getOrPutAssumeCapacityAdapted(key, ctx); + if (!gop.found_existing) { + gop.key_ptr.* = key; + } + return gop; + } + /// If there is an existing item with `key`, then the result + /// `Entry` pointers point to it, and found_existing is true. + /// Otherwise, puts a new item with undefined key and value, and + /// the `Entry` pointers point to it. Caller must then initialize + /// both the key and the value. + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult { const header = self.index_header orelse { // Linear scan. - const h = if (store_hash) hash(key) else {}; - for (self.entries.items) |*item, i| { - if (item.hash == h and eql(key, item.key)) { + const h = if (store_hash) checkedHash(ctx, key) else {}; + const slice = self.entries.slice(); + const hashes_array = slice.items(.hash); + const keys_array = slice.items(.key); + for (keys_array) |*item_key, i| { + if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*)) { return GetOrPutResult{ - .entry = item, + .key_ptr = item_key, + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &slice.items(.value)[i], .found_existing = true, .index = i, }; } } - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, - }; + + const index = self.entries.addOneAssumeCapacity(); + // unsafe indexing because the length changed + if (store_hash) hashes_array.ptr[index] = h; + return GetOrPutResult{ - .entry = new_entry, + .key_ptr = &keys_array.ptr[index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &slice.items(.value).ptr[index], .found_existing = false, - .index = self.entries.items.len - 1, + .index = index, }; }; switch (header.capacityIndexType()) { - .u8 => return self.getOrPutInternal(key, header, u8), - .u16 => return self.getOrPutInternal(key, header, u16), - .u32 => return self.getOrPutInternal(key, header, u32), - .usize => return self.getOrPutInternal(key, header, usize), + .u8 => return self.getOrPutInternal(key, ctx, header, u8), + .u16 => return self.getOrPutInternal(key, ctx, header, u16), + .u32 => return self.getOrPutInternal(key, ctx, header, u32), } } - pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { - const res = try self.getOrPut(allocator, key); - if (!res.found_existing) - res.entry.value = value; - - return res.entry; + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !GetOrPutResult { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutValueContext instead."); + return self.getOrPutValueContext(allocator, key, value, undefined); + } + pub fn getOrPutValueContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !GetOrPutResult { + const res = try self.getOrPutContextAdapted(allocator, key, ctx, ctx); + if (!res.found_existing) { + res.key_ptr.* = key; + res.value_ptr.* = value; + } + return res; } /// Deprecated: call `ensureUnusedCapacity` or `ensureTotalCapacity`. @@ -488,30 +734,30 @@ pub fn ArrayHashMapUnmanaged( /// Increases capacity, guaranteeing that insertions up until the /// `expected_count` will not cause an allocation, and therefore cannot fail. pub fn ensureTotalCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { - try self.entries.ensureTotalCapacity(allocator, new_capacity); - if (new_capacity <= linear_scan_max) return; + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call ensureTotalCapacityContext instead."); + return self.ensureTotalCapacityContext(allocator, new_capacity, undefined); + } + pub fn ensureTotalCapacityContext(self: *Self, allocator: *Allocator, new_capacity: usize, ctx: Context) !void { + if (new_capacity <= linear_scan_max) { + try self.entries.ensureCapacity(allocator, new_capacity); + return; + } - // Ensure that the indexes will be at most 60% full if - // `new_capacity` items are put into it. - const needed_len = new_capacity * 5 / 3; if (self.index_header) |header| { - if (needed_len > header.indexes_len) { - // An overflow here would mean the amount of memory required would not - // be representable in the address space. - const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; - const new_header = try IndexHeader.alloc(allocator, new_indexes_len); - self.insertAllEntriesIntoNewHeader(new_header); - header.free(allocator); - self.index_header = new_header; + if (new_capacity <= header.capacity()) { + try self.entries.ensureCapacity(allocator, new_capacity); + return; } - } else { - // An overflow here would mean the amount of memory required would not - // be representable in the address space. - const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; - const header = try IndexHeader.alloc(allocator, new_indexes_len); - self.insertAllEntriesIntoNewHeader(header); - self.index_header = header; } + + const new_bit_index = try IndexHeader.findBitIndex(new_capacity); + const new_header = try IndexHeader.alloc(allocator, new_bit_index); + try self.entries.ensureCapacity(allocator, new_capacity); + + if (self.index_header) |old_header| old_header.free(allocator); + self.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header); + self.index_header = new_header; } /// Increases capacity, guaranteeing that insertions up until @@ -522,7 +768,17 @@ pub fn ArrayHashMapUnmanaged( allocator: *Allocator, additional_capacity: usize, ) !void { - return self.ensureTotalCapacity(allocator, self.count() + additional_capacity); + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call ensureTotalCapacityContext instead."); + return self.ensureUnusedCapacityContext(allocator, additional_capacity, undefined); + } + pub fn ensureUnusedCapacityContext( + self: *Self, + allocator: *Allocator, + additional_capacity: usize, + ctx: Context, + ) !void { + return self.ensureTotalCapacityContext(allocator, self.count() + additional_capacity, ctx); } /// Returns the number of total elements which may be present before it is @@ -530,141 +786,321 @@ pub fn ArrayHashMapUnmanaged( pub fn capacity(self: Self) usize { const entry_cap = self.entries.capacity; const header = self.index_header orelse return math.min(linear_scan_max, entry_cap); - const indexes_cap = (header.indexes_len + 1) * 3 / 4; + const indexes_cap = header.capacity(); return math.min(entry_cap, indexes_cap); } /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPut`. pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { - const result = try self.getOrPut(allocator, key); - result.entry.value = value; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putContext instead."); + return self.putContext(allocator, key, value, undefined); + } + pub fn putContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !void { + const result = try self.getOrPutContext(allocator, key, ctx); + result.value_ptr.* = value; } /// Inserts a key-value pair into the hash map, asserting that no previous /// entry with the same key is already present pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { - const result = try self.getOrPut(allocator, key); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putNoClobberContext instead."); + return self.putNoClobberContext(allocator, key, value, undefined); + } + pub fn putNoClobberContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !void { + const result = try self.getOrPutContext(allocator, key, ctx); assert(!result.found_existing); - result.entry.value = value; + result.value_ptr.* = value; } /// Asserts there is enough capacity to store the new key-value pair. /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { - const result = self.getOrPutAssumeCapacity(key); - result.entry.value = value; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putAssumeCapacityContext instead."); + return self.putAssumeCapacityContext(key, value, undefined); + } + pub fn putAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) void { + const result = self.getOrPutAssumeCapacityContext(key, ctx); + result.value_ptr.* = value; } /// Asserts there is enough capacity to store the new key-value pair. /// Asserts that it does not clobber any existing data. /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - const result = self.getOrPutAssumeCapacity(key); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putAssumeCapacityNoClobberContext instead."); + return self.putAssumeCapacityNoClobberContext(key, value, undefined); + } + pub fn putAssumeCapacityNoClobberContext(self: *Self, key: K, value: V, ctx: Context) void { + const result = self.getOrPutAssumeCapacityContext(key, ctx); assert(!result.found_existing); - result.entry.value = value; + result.value_ptr.* = value; } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. - pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { - const gop = try self.getOrPut(allocator, key); - var result: ?Entry = null; + pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchPutContext instead."); + return self.fetchPutContext(allocator, key, value, undefined); + } + pub fn fetchPutContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !?KV { + const gop = try self.getOrPutContext(allocator, key, ctx); + var result: ?KV = null; if (gop.found_existing) { - result = gop.entry.*; + result = KV{ + .key = gop.key_ptr.*, + .value = gop.value_ptr.*, + }; } - gop.entry.value = value; + gop.value_ptr.* = value; return result; } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. /// If insertion happens, asserts there is enough capacity without allocating. - pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { - const gop = self.getOrPutAssumeCapacity(key); - var result: ?Entry = null; + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchPutAssumeCapacityContext instead."); + return self.fetchPutAssumeCapacityContext(key, value, undefined); + } + pub fn fetchPutAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) ?KV { + const gop = self.getOrPutAssumeCapacityContext(key, ctx); + var result: ?KV = null; if (gop.found_existing) { - result = gop.entry.*; + result = KV{ + .key = gop.key_ptr.*, + .value = gop.value_ptr.*, + }; } - gop.entry.value = value; + gop.value_ptr.* = value; return result; } - pub fn getEntry(self: Self, key: K) ?*Entry { - const index = self.getIndex(key) orelse return null; - return &self.entries.items[index]; + /// Finds pointers to the key and value storage associated with a key. + pub fn getEntry(self: Self, key: K) ?Entry { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getEntryContext instead."); + return self.getEntryContext(key, undefined); + } + pub fn getEntryContext(self: Self, key: K, ctx: Context) ?Entry { + return self.getEntryAdapted(key, ctx); + } + pub fn getEntryAdapted(self: Self, key: anytype, ctx: anytype) ?Entry { + const index = self.getIndexAdapted(key, ctx) orelse return null; + const slice = self.entries.slice(); + return Entry{ + .key_ptr = &slice.items(.key)[index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &slice.items(.value)[index], + }; } + /// Finds the index in the `entries` array where a key is stored pub fn getIndex(self: Self, key: K) ?usize { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getIndexContext instead."); + return self.getIndexContext(key, undefined); + } + pub fn getIndexContext(self: Self, key: K, ctx: Context) ?usize { + return self.getIndexAdapted(key, ctx); + } + pub fn getIndexAdapted(self: Self, key: anytype, ctx: anytype) ?usize { const header = self.index_header orelse { // Linear scan. - const h = if (store_hash) hash(key) else {}; - for (self.entries.items) |*item, i| { - if (item.hash == h and eql(key, item.key)) { + const h = if (store_hash) checkedHash(ctx, key) else {}; + const slice = self.entries.slice(); + const hashes_array = slice.items(.hash); + const keys_array = slice.items(.key); + for (keys_array) |*item_key, i| { + if (hashes_array[i] == h and checkedEql(ctx, key, item_key.*)) { return i; } } return null; }; switch (header.capacityIndexType()) { - .u8 => return self.getInternal(key, header, u8), - .u16 => return self.getInternal(key, header, u16), - .u32 => return self.getInternal(key, header, u32), - .usize => return self.getInternal(key, header, usize), + .u8 => return self.getIndexWithHeaderGeneric(key, ctx, header, u8), + .u16 => return self.getIndexWithHeaderGeneric(key, ctx, header, u16), + .u32 => return self.getIndexWithHeaderGeneric(key, ctx, header, u32), } } + fn getIndexWithHeaderGeneric(self: Self, key: anytype, ctx: anytype, header: *IndexHeader, comptime I: type) ?usize { + const indexes = header.indexes(I); + const slot = self.getSlotByKey(key, ctx, header, I, indexes) orelse return null; + return indexes[slot].entry_index; + } + /// Find the value associated with a key pub fn get(self: Self, key: K) ?V { - return if (self.getEntry(key)) |entry| entry.value else null; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getContext instead."); + return self.getContext(key, undefined); + } + pub fn getContext(self: Self, key: K, ctx: Context) ?V { + return self.getAdapted(key, ctx); + } + pub fn getAdapted(self: Self, key: anytype, ctx: anytype) ?V { + const index = self.getIndexAdapted(key, ctx) orelse return null; + return self.values()[index]; + } + + /// Find a pointer to the value associated with a key + pub fn getPtr(self: Self, key: K) ?*V { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getPtrContext instead."); + return self.getPtrContext(key, undefined); + } + pub fn getPtrContext(self: Self, key: K, ctx: Context) ?*V { + return self.getPtrAdapted(key, ctx); + } + pub fn getPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*V { + const index = self.getIndexAdapted(key, ctx) orelse return null; + // workaround for #6974 + return if (@sizeOf(*V) == 0) @as(*V, undefined) else &self.values()[index]; } + /// Check whether a key is stored in the map pub fn contains(self: Self, key: K) bool { - return self.getEntry(key) != null; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call containsContext instead."); + return self.containsContext(key, undefined); + } + pub fn containsContext(self: Self, key: K, ctx: Context) bool { + return self.containsAdapted(key, ctx); + } + pub fn containsAdapted(self: Self, key: anytype, ctx: anytype) bool { + return self.getIndexAdapted(key, ctx) != null; } /// If there is an `Entry` with a matching key, it is deleted from /// the hash map, and then returned from this function. The entry is /// removed from the underlying array by swapping it with the last /// element. - pub fn swapRemove(self: *Self, key: K) ?Entry { - return self.removeInternal(key, .swap); + pub fn fetchSwapRemove(self: *Self, key: K) ?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchSwapRemoveContext instead."); + return self.fetchSwapRemoveContext(key, undefined); + } + pub fn fetchSwapRemoveContext(self: *Self, key: K, ctx: Context) ?KV { + return self.fetchSwapRemoveContextAdapted(key, ctx, ctx); + } + pub fn fetchSwapRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchSwapRemoveContextAdapted instead."); + return self.fetchSwapRemoveContextAdapted(key, ctx, undefined); + } + pub fn fetchSwapRemoveContextAdapted(self: *Self, key: anytype, key_ctx: anytype, ctx: Context) ?KV { + return self.fetchRemoveByKey(key, key_ctx, if (store_hash) {} else ctx, .swap); } /// If there is an `Entry` with a matching key, it is deleted from /// the hash map, and then returned from this function. The entry is /// removed from the underlying array by shifting all elements forward /// thereby maintaining the current ordering. - pub fn orderedRemove(self: *Self, key: K) ?Entry { - return self.removeInternal(key, .ordered); + pub fn fetchOrderedRemove(self: *Self, key: K) ?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchOrderedRemoveContext instead."); + return self.fetchOrderedRemoveContext(key, undefined); + } + pub fn fetchOrderedRemoveContext(self: *Self, key: K, ctx: Context) ?KV { + return self.fetchOrderedRemoveContextAdapted(key, ctx, ctx); + } + pub fn fetchOrderedRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchOrderedRemoveContextAdapted instead."); + return self.fetchOrderedRemoveContextAdapted(key, ctx, undefined); + } + pub fn fetchOrderedRemoveContextAdapted(self: *Self, key: anytype, key_ctx: anytype, ctx: Context) ?KV { + return self.fetchRemoveByKey(key, key_ctx, if (store_hash) {} else ctx, .ordered); } - /// TODO deprecated: call swapRemoveAssertDiscard instead. - pub fn removeAssertDiscard(self: *Self, key: K) void { - return self.swapRemoveAssertDiscard(key); + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map. The entry is removed from the underlying array + /// by swapping it with the last element. Returns true if an entry + /// was removed, false otherwise. + pub fn swapRemove(self: *Self, key: K) bool { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call swapRemoveContext instead."); + return self.swapRemoveContext(key, undefined); + } + pub fn swapRemoveContext(self: *Self, key: K, ctx: Context) bool { + return self.swapRemoveContextAdapted(key, ctx, ctx); + } + pub fn swapRemoveAdapted(self: *Self, key: anytype, ctx: anytype) bool { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call swapRemoveContextAdapted instead."); + return self.swapRemoveContextAdapted(key, ctx, undefined); + } + pub fn swapRemoveContextAdapted(self: *Self, key: anytype, key_ctx: anytype, ctx: Context) bool { + return self.removeByKey(key, key_ctx, if (store_hash) {} else ctx, .swap); } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map - /// by swapping it with the last element, and discards it. - pub fn swapRemoveAssertDiscard(self: *Self, key: K) void { - assert(self.swapRemove(key) != null); + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map. The entry is removed from the underlying array + /// by shifting all elements forward, thereby maintaining the + /// current ordering. Returns true if an entry was removed, false otherwise. + pub fn orderedRemove(self: *Self, key: K) bool { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call orderedRemoveContext instead."); + return self.orderedRemoveContext(key, undefined); + } + pub fn orderedRemoveContext(self: *Self, key: K, ctx: Context) bool { + return self.orderedRemoveContextAdapted(key, ctx, ctx); + } + pub fn orderedRemoveAdapted(self: *Self, key: anytype, ctx: anytype) bool { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call orderedRemoveContextAdapted instead."); + return self.orderedRemoveContextAdapted(key, ctx, undefined); + } + pub fn orderedRemoveContextAdapted(self: *Self, key: anytype, key_ctx: anytype, ctx: Context) bool { + return self.removeByKey(key, key_ctx, if (store_hash) {} else ctx, .ordered); } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map - /// by by shifting all elements forward thereby maintaining the current ordering. - pub fn orderedRemoveAssertDiscard(self: *Self, key: K) void { - assert(self.orderedRemove(key) != null); + /// Deletes the item at the specified index in `entries` from + /// the hash map. The entry is removed from the underlying array + /// by swapping it with the last element. + pub fn swapRemoveAt(self: *Self, index: usize) void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call swapRemoveAtContext instead."); + return self.swapRemoveAtContext(index, undefined); + } + pub fn swapRemoveAtContext(self: *Self, index: usize, ctx: Context) void { + self.removeByIndex(index, if (store_hash) {} else ctx, .swap); } - pub fn items(self: Self) []Entry { - return self.entries.items; + /// Deletes the item at the specified index in `entries` from + /// the hash map. The entry is removed from the underlying array + /// by shifting all elements forward, thereby maintaining the + /// current ordering. + pub fn orderedRemoveAt(self: *Self, index: usize) void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call orderedRemoveAtContext instead."); + return self.orderedRemoveAtContext(index, undefined); + } + pub fn orderedRemoveAtContext(self: *Self, index: usize, ctx: Context) void { + self.removeByIndex(index, if (store_hash) {} else ctx, .ordered); } + /// Create a copy of the hash map which can be modified separately. + /// The copy uses the same context and allocator as this instance. pub fn clone(self: Self, allocator: *Allocator) !Self { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call cloneContext instead."); + return self.cloneContext(allocator, undefined); + } + pub fn cloneContext(self: Self, allocator: *Allocator, ctx: Context) !Self { var other: Self = .{}; - try other.entries.appendSlice(allocator, self.entries.items); + other.entries = try self.entries.clone(allocator); + errdefer other.entries.deinit(allocator); if (self.index_header) |header| { - const new_header = try IndexHeader.alloc(allocator, header.indexes_len); - other.insertAllEntriesIntoNewHeader(new_header); + const new_header = try IndexHeader.alloc(allocator, header.bit_index); + other.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header); other.index_header = new_header; } return other; @@ -673,135 +1109,197 @@ pub fn ArrayHashMapUnmanaged( /// Rebuilds the key indexes. If the underlying entries has been modified directly, users /// can call `reIndex` to update the indexes to account for these new entries. pub fn reIndex(self: *Self, allocator: *Allocator) !void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call reIndexContext instead."); + return self.reIndexContext(allocator, undefined); + } + pub fn reIndexContext(self: *Self, allocator: *Allocator, ctx: Context) !void { if (self.entries.capacity <= linear_scan_max) return; // We're going to rebuild the index header and replace the existing one (if any). The // indexes should sized such that they will be at most 60% full. - const needed_len = self.entries.capacity * 5 / 3; - const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; - const new_header = try IndexHeader.alloc(allocator, new_indexes_len); - self.insertAllEntriesIntoNewHeader(new_header); - if (self.index_header) |header| - header.free(allocator); + const bit_index = try IndexHeader.findBitIndex(self.entries.capacity); + const new_header = try IndexHeader.alloc(allocator, bit_index); + if (self.index_header) |header| header.free(allocator); + self.insertAllEntriesIntoNewHeader(if (store_hash) {} else ctx, new_header); self.index_header = new_header; } /// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated /// index entries. Keeps capacity the same. pub fn shrinkRetainingCapacity(self: *Self, new_len: usize) void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call shrinkRetainingCapacityContext instead."); + return self.shrinkRetainingCapacityContext(new_len, undefined); + } + pub fn shrinkRetainingCapacityContext(self: *Self, new_len: usize, ctx: Context) void { // Remove index entries from the new length onwards. // Explicitly choose to ONLY remove index entries and not the underlying array list // entries as we're going to remove them in the subsequent shrink call. - var i: usize = new_len; - while (i < self.entries.items.len) : (i += 1) - _ = self.removeWithHash(self.entries.items[i].key, self.entries.items[i].hash, .index_only); + if (self.index_header) |header| { + var i: usize = new_len; + while (i < self.entries.len) : (i += 1) + self.removeFromIndexByIndex(i, if (store_hash) {} else ctx, header); + } self.entries.shrinkRetainingCapacity(new_len); } /// Shrinks the underlying `Entry` array to `new_len` elements and discards any associated /// index entries. Reduces allocated capacity. pub fn shrinkAndFree(self: *Self, allocator: *Allocator, new_len: usize) void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call shrinkAndFreeContext instead."); + return self.shrinkAndFreeContext(allocator, new_len, undefined); + } + pub fn shrinkAndFreeContext(self: *Self, allocator: *Allocator, new_len: usize, ctx: Context) void { // Remove index entries from the new length onwards. // Explicitly choose to ONLY remove index entries and not the underlying array list // entries as we're going to remove them in the subsequent shrink call. - var i: usize = new_len; - while (i < self.entries.items.len) : (i += 1) - _ = self.removeWithHash(self.entries.items[i].key, self.entries.items[i].hash, .index_only); + if (self.index_header) |header| { + var i: usize = new_len; + while (i < self.entries.len) : (i += 1) + self.removeFromIndexByIndex(i, if (store_hash) {} else ctx, header); + } self.entries.shrinkAndFree(allocator, new_len); } /// Removes the last inserted `Entry` in the hash map and returns it. - pub fn pop(self: *Self) Entry { - const top = self.entries.items[self.entries.items.len - 1]; - _ = self.removeWithHash(top.key, top.hash, .index_only); - self.entries.items.len -= 1; - return top; + pub fn pop(self: *Self) KV { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call popContext instead."); + return self.popContext(undefined); } - - fn removeInternal(self: *Self, key: K, comptime removal_type: RemovalType) ?Entry { - const key_hash = if (store_hash) hash(key) else {}; - return self.removeWithHash(key, key_hash, removal_type); + pub fn popContext(self: *Self, ctx: Context) KV { + const item = self.entries.get(self.entries.len-1); + if (self.index_header) |header| + self.removeFromIndexByIndex(self.entries.len-1, if (store_hash) {} else ctx, header); + self.entries.len -= 1; + return .{ + .key = item.key, + .value = item.value, + }; } - fn removeWithHash(self: *Self, key: K, key_hash: Hash, comptime removal_type: RemovalType) ?Entry { + // ------------------ No pub fns below this point ------------------ + + fn fetchRemoveByKey(self: *Self, key: anytype, key_ctx: anytype, ctx: ByIndexContext, comptime removal_type: RemovalType) ?KV { const header = self.index_header orelse { - // If we're only removing index entries and we have no index header, there's no need - // to continue. - if (removal_type == .index_only) return null; // Linear scan. - for (self.entries.items) |item, i| { - if (item.hash == key_hash and eql(key, item.key)) { + const key_hash = if (store_hash) key_ctx.hash(key) else {}; + const slice = self.entries.slice(); + const hashes_array = if (store_hash) slice.items(.hash) else {}; + const keys_array = slice.items(.key); + for (keys_array) |*item_key, i| { + const hash_match = if (store_hash) hashes_array[i] == key_hash else true; + if (hash_match and key_ctx.eql(key, item_key.*)) { + const removed_entry: KV = .{ + .key = keys_array[i], + .value = slice.items(.value)[i], + }; switch (removal_type) { - .swap => return self.entries.swapRemove(i), - .ordered => return self.entries.orderedRemove(i), - .index_only => unreachable, + .swap => self.entries.swapRemove(i), + .ordered => self.entries.orderedRemove(i), } + return removed_entry; } } return null; }; - switch (header.capacityIndexType()) { - .u8 => return self.removeWithIndex(key, key_hash, header, u8, removal_type), - .u16 => return self.removeWithIndex(key, key_hash, header, u16, removal_type), - .u32 => return self.removeWithIndex(key, key_hash, header, u32, removal_type), - .usize => return self.removeWithIndex(key, key_hash, header, usize, removal_type), - } + return switch (header.capacityIndexType()) { + .u8 => self.fetchRemoveByKeyGeneric(key, key_ctx, ctx, header, u8, removal_type), + .u16 => self.fetchRemoveByKeyGeneric(key, key_ctx, ctx, header, u16, removal_type), + .u32 => self.fetchRemoveByKeyGeneric(key, key_ctx, ctx, header, u32, removal_type), + }; } - - fn removeWithIndex(self: *Self, key: K, key_hash: Hash, header: *IndexHeader, comptime I: type, comptime removal_type: RemovalType) ?Entry { + fn fetchRemoveByKeyGeneric(self: *Self, key: anytype, key_ctx: anytype, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, comptime removal_type: RemovalType) ?KV { const indexes = header.indexes(I); - const h = if (store_hash) key_hash else hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - var index = &indexes[index_index]; - if (index.isEmpty()) - return null; - - const entry = &self.entries.items[index.entry_index]; - - const hash_match = if (store_hash) h == entry.hash else true; - if (!hash_match or !eql(key, entry.key)) - continue; + const entry_index = self.removeFromIndexByKey(key, key_ctx, header, I, indexes) orelse return null; + const slice = self.entries.slice(); + const removed_entry: KV = .{ + .key = slice.items(.key)[entry_index], + .value = slice.items(.value)[entry_index], + }; + self.removeFromArrayAndUpdateIndex(entry_index, ctx, header, I, indexes, removal_type); + return removed_entry; + } - var removed_entry: ?Entry = undefined; - switch (removal_type) { - .swap => { - removed_entry = self.entries.swapRemove(index.entry_index); - if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) { - // Because of the swap remove, now we need to update the index that was - // pointing to the last entry and is now pointing to this removed item slot. - self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes); - } - }, - .ordered => { - removed_entry = self.entries.orderedRemove(index.entry_index); - var i: usize = index.entry_index; - while (i < self.entries.items.len) : (i += 1) { - // Because of the ordered remove, everything from the entry index onwards has - // been shifted forward so we'll need to update the index entries. - self.updateEntryIndex(header, i + 1, i, I, indexes); + fn removeByKey(self: *Self, key: anytype, key_ctx: anytype, ctx: ByIndexContext, comptime removal_type: RemovalType) bool { + const header = self.index_header orelse { + // Linear scan. + const key_hash = if (store_hash) key_ctx.hash(key) else {}; + const slice = self.entries.slice(); + const hashes_array = if (store_hash) slice.items(.hash) else {}; + const keys_array = slice.items(.key); + for (keys_array) |*item_key, i| { + const hash_match = if (store_hash) hashes_array[i] == key_hash else true; + if (hash_match and key_ctx.eql(key, item_key.*)) { + switch (removal_type) { + .swap => self.entries.swapRemove(i), + .ordered => self.entries.orderedRemove(i), } - }, - .index_only => removed_entry = null, + return true; + } } + return false; + }; + return switch (header.capacityIndexType()) { + .u8 => self.removeByKeyGeneric(key, key_ctx, ctx, header, u8, removal_type), + .u16 => self.removeByKeyGeneric(key, key_ctx, ctx, header, u16, removal_type), + .u32 => self.removeByKeyGeneric(key, key_ctx, ctx, header, u32, removal_type), + }; + } + fn removeByKeyGeneric(self: *Self, key: anytype, key_ctx: anytype, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, comptime removal_type: RemovalType) bool { + const indexes = header.indexes(I); + const entry_index = self.removeFromIndexByKey(key, key_ctx, header, I, indexes) orelse return false; + self.removeFromArrayAndUpdateIndex(entry_index, ctx, header, I, indexes, removal_type); + return true; + } - // Now we have to shift over the following indexes. - roll_over += 1; - while (roll_over < header.indexes_len) : (roll_over += 1) { - const next_index_index = header.constrainIndex(start_index + roll_over); - const next_index = &indexes[next_index_index]; - if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { - index.setEmpty(); - return removed_entry; - } - index.* = next_index.*; - index.distance_from_start_index -= 1; - index = next_index; + fn removeByIndex(self: *Self, entry_index: usize, ctx: ByIndexContext, comptime removal_type: RemovalType) void { + assert(entry_index < self.entries.len); + const header = self.index_header orelse { + switch (removal_type) { + .swap => self.entries.swapRemove(entry_index), + .ordered => self.entries.orderedRemove(entry_index), } - unreachable; + return; + }; + switch (header.capacityIndexType()) { + .u8 => self.removeByIndexGeneric(entry_index, ctx, header, u8, removal_type), + .u16 => self.removeByIndexGeneric(entry_index, ctx, header, u16, removal_type), + .u32 => self.removeByIndexGeneric(entry_index, ctx, header, u32, removal_type), + } + } + fn removeByIndexGeneric(self: *Self, entry_index: usize, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, comptime removal_type: RemovalType) void { + const indexes = header.indexes(I); + self.removeFromIndexByIndexGeneric(entry_index, ctx, header, I, indexes); + self.removeFromArrayAndUpdateIndex(entry_index, ctx, header, I, indexes, removal_type); + } + + fn removeFromArrayAndUpdateIndex(self: *Self, entry_index: usize, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, indexes: []Index(I), comptime removal_type: RemovalType) void { + const last_index = self.entries.len-1; // overflow => remove from empty map + switch (removal_type) { + .swap => { + if (last_index != entry_index) { + // Because of the swap remove, now we need to update the index that was + // pointing to the last entry and is now pointing to this removed item slot. + self.updateEntryIndex(header, last_index, entry_index, ctx, I, indexes); + } + // updateEntryIndex reads from the old entry index, + // so it needs to run before removal. + self.entries.swapRemove(entry_index); + }, + .ordered => { + var i: usize = entry_index; + while (i < last_index) : (i += 1) { + // Because of the ordered remove, everything from the entry index onwards has + // been shifted forward so we'll need to update the index entries. + self.updateEntryIndex(header, i + 1, i, ctx, I, indexes); + } + // updateEntryIndex reads from the old entry index, + // so it needs to run before removal. + self.entries.orderedRemove(entry_index); + }, } - return null; } fn updateEntryIndex( @@ -809,116 +1307,188 @@ pub fn ArrayHashMapUnmanaged( header: *IndexHeader, old_entry_index: usize, new_entry_index: usize, + ctx: ByIndexContext, comptime I: type, indexes: []Index(I), ) void { - const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = &indexes[index_index]; - if (index.entry_index == old_entry_index) { - index.entry_index = @intCast(I, new_entry_index); + const slot = self.getSlotByIndex(old_entry_index, ctx, header, I, indexes); + indexes[slot].entry_index = @intCast(I, new_entry_index); + } + + fn removeFromIndexByIndex(self: *Self, entry_index: usize, ctx: ByIndexContext, header: *IndexHeader) void { + switch (header.capacityIndexType()) { + .u8 => self.removeFromIndexByIndexGeneric(entry_index, ctx, header, u8, header.indexes(u8)), + .u16 => self.removeFromIndexByIndexGeneric(entry_index, ctx, header, u16, header.indexes(u16)), + .u32 => self.removeFromIndexByIndexGeneric(entry_index, ctx, header, u32, header.indexes(u32)), + } + } + fn removeFromIndexByIndexGeneric(self: *Self, entry_index: usize, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, indexes: []Index(I)) void { + const slot = self.getSlotByIndex(entry_index, ctx, header, I, indexes); + self.removeSlot(slot, header, I, indexes); + } + + fn removeFromIndexByKey(self: *Self, key: anytype, ctx: anytype, header: *IndexHeader, comptime I: type, indexes: []Index(I)) ?usize { + const slot = self.getSlotByKey(key, ctx, header, I, indexes) orelse return null; + const removed_entry_index = indexes[slot].entry_index; + self.removeSlot(slot, header, I, indexes); + return removed_entry_index; + } + + fn removeSlot(self: *Self, removed_slot: usize, header: *IndexHeader, comptime I: type, indexes: []Index(I)) void { + const start_index = removed_slot +% 1; + const end_index = start_index +% indexes.len; + + var last_slot = removed_slot; + var index: usize = start_index; + while (index != end_index) : (index +%= 1) { + const slot = header.constrainIndex(index); + const slot_data = indexes[slot]; + if (slot_data.isEmpty() or slot_data.distance_from_start_index == 0) { + indexes[last_slot].setEmpty(); return; } + indexes[last_slot] = .{ + .entry_index = slot_data.entry_index, + .distance_from_start_index = slot_data.distance_from_start_index - 1, + }; + last_slot = slot; + } + unreachable; + } + + fn getSlotByIndex(self: *Self, entry_index: usize, ctx: ByIndexContext, header: *IndexHeader, comptime I: type, indexes: []Index(I)) usize { + const slice = self.entries.slice(); + const h = if (store_hash) slice.items(.hash)[entry_index] + else checkedHash(ctx, slice.items(.key)[entry_index]); + const start_index = safeTruncate(usize, h); + const end_index = start_index +% indexes.len; + + var index = start_index; + var distance_from_start_index: I = 0; + while (index != end_index) : ({ + index +%= 1; + distance_from_start_index += 1; + }) { + const slot = header.constrainIndex(index); + const slot_data = indexes[slot]; + + // This is the fundamental property of the array hash map index. If this + // assert fails, it probably means that the entry was not in the index. + assert(!slot_data.isEmpty()); + assert(slot_data.distance_from_start_index >= distance_from_start_index); + + if (slot_data.entry_index == entry_index) { + return slot; + } } unreachable; } /// Must ensureCapacity before calling this. - fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { + fn getOrPutInternal(self: *Self, key: anytype, ctx: anytype, header: *IndexHeader, comptime I: type) GetOrPutResult { + const slice = self.entries.slice(); + const hashes_array = if (store_hash) slice.items(.hash) else {}; + const keys_array = slice.items(.key); + const values_array = slice.items(.value); const indexes = header.indexes(I); - const h = hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - var distance_from_start_index: usize = 0; - while (roll_over <= header.indexes_len) : ({ - roll_over += 1; + + const h = checkedHash(ctx, key); + const start_index = safeTruncate(usize, h); + const end_index = start_index +% indexes.len; + + var index = start_index; + var distance_from_start_index: I = 0; + while (index != end_index) : ({ + index +%= 1; distance_from_start_index += 1; }) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = indexes[index_index]; - if (index.isEmpty()) { - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, self.entries.items.len), - }; - header.maybeBumpMax(distance_from_start_index); - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, + var slot = header.constrainIndex(index); + var slot_data = indexes[slot]; + + // If the slot is empty, there can be no more items in this run. + // We didn't find a matching item, so this must be new. + // Put it in the empty slot. + if (slot_data.isEmpty()) { + const new_index = self.entries.addOneAssumeCapacity(); + indexes[slot] = .{ + .distance_from_start_index = distance_from_start_index, + .entry_index = @intCast(I, new_index), }; + + // update the hash if applicable + if (store_hash) hashes_array.ptr[new_index] = h; + return .{ .found_existing = false, - .entry = new_entry, - .index = self.entries.items.len - 1, + .key_ptr = &keys_array.ptr[new_index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &values_array.ptr[new_index], + .index = new_index, }; } // This pointer survives the following append because we call // entries.ensureCapacity before getOrPutInternal. - const entry = &self.entries.items[index.entry_index]; - const hash_match = if (store_hash) h == entry.hash else true; - if (hash_match and eql(key, entry.key)) { + const hash_match = if (store_hash) h == hashes_array[slot_data.entry_index] else true; + if (hash_match and checkedEql(ctx, key, keys_array[slot_data.entry_index])) { return .{ .found_existing = true, - .entry = entry, - .index = index.entry_index, + .key_ptr = &keys_array[slot_data.entry_index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &values_array[slot_data.entry_index], + .index = slot_data.entry_index, }; } - if (index.distance_from_start_index < distance_from_start_index) { + + // If the entry is closer to its target than our current distance, + // the entry we are looking for does not exist. It would be in + // this slot instead if it was here. So stop looking, and switch + // to insert mode. + if (slot_data.distance_from_start_index < distance_from_start_index) { // In this case, we did not find the item. We will put a new entry. // However, we will use this index for the new entry, and move - // the previous index down the line, to keep the max_distance_from_start_index + // the previous index down the line, to keep the max distance_from_start_index // as small as possible. - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, self.entries.items.len), + const new_index = self.entries.addOneAssumeCapacity(); + if (store_hash) hashes_array.ptr[new_index] = h; + indexes[slot] = .{ + .entry_index = @intCast(I, new_index), + .distance_from_start_index = distance_from_start_index, }; - header.maybeBumpMax(distance_from_start_index); - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, - }; - - distance_from_start_index = index.distance_from_start_index; - var prev_entry_index = index.entry_index; + distance_from_start_index = slot_data.distance_from_start_index; + var displaced_index = slot_data.entry_index; // Find somewhere to put the index we replaced by shifting // following indexes backwards. - roll_over += 1; + index +%= 1; distance_from_start_index += 1; - while (roll_over < header.indexes_len) : ({ - roll_over += 1; + while (index != end_index) : ({ + index +%= 1; distance_from_start_index += 1; }) { - const next_index_index = header.constrainIndex(start_index + roll_over); - const next_index = indexes[next_index_index]; - if (next_index.isEmpty()) { - header.maybeBumpMax(distance_from_start_index); - indexes[next_index_index] = .{ - .entry_index = prev_entry_index, - .distance_from_start_index = @intCast(I, distance_from_start_index), + slot = header.constrainIndex(index); + slot_data = indexes[slot]; + if (slot_data.isEmpty()) { + indexes[slot] = .{ + .entry_index = displaced_index, + .distance_from_start_index = distance_from_start_index, }; return .{ .found_existing = false, - .entry = new_entry, - .index = self.entries.items.len - 1, + .key_ptr = &keys_array.ptr[new_index], + // workaround for #6974 + .value_ptr = if (@sizeOf(*V) == 0) undefined else &values_array.ptr[new_index], + .index = new_index, }; } - if (next_index.distance_from_start_index < distance_from_start_index) { - header.maybeBumpMax(distance_from_start_index); - indexes[next_index_index] = .{ - .entry_index = prev_entry_index, - .distance_from_start_index = @intCast(I, distance_from_start_index), + + if (slot_data.distance_from_start_index < distance_from_start_index) { + indexes[slot] = .{ + .entry_index = displaced_index, + .distance_from_start_index = distance_from_start_index, }; - distance_from_start_index = next_index.distance_from_start_index; - prev_entry_index = next_index.entry_index; + displaced_index = slot_data.entry_index; + distance_from_start_index = slot_data.distance_from_start_index; } } unreachable; @@ -927,61 +1497,69 @@ pub fn ArrayHashMapUnmanaged( unreachable; } - fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?usize { - const indexes = header.indexes(I); - const h = hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = indexes[index_index]; - if (index.isEmpty()) + fn getSlotByKey(self: Self, key: anytype, ctx: anytype, header: *IndexHeader, comptime I: type, indexes: []Index(I)) ?usize { + const slice = self.entries.slice(); + const hashes_array = if (store_hash) slice.items(.hash) else {}; + const keys_array = slice.items(.key); + const h = checkedHash(ctx, key); + + const start_index = safeTruncate(usize, h); + const end_index = start_index +% indexes.len; + + var index = start_index; + var distance_from_start_index: I = 0; + while (index != end_index) : ({ + index +%= 1; + distance_from_start_index += 1; + }) { + const slot = header.constrainIndex(index); + const slot_data = indexes[slot]; + if (slot_data.isEmpty() or slot_data.distance_from_start_index < distance_from_start_index) return null; - const entry = &self.entries.items[index.entry_index]; - const hash_match = if (store_hash) h == entry.hash else true; - if (hash_match and eql(key, entry.key)) - return index.entry_index; + const hash_match = if (store_hash) h == hashes_array[slot_data.entry_index] else true; + if (hash_match and checkedEql(ctx, key, keys_array[slot_data.entry_index])) + return slot; } - return null; + unreachable; } - fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void { + fn insertAllEntriesIntoNewHeader(self: *Self, ctx: ByIndexContext, header: *IndexHeader) void { switch (header.capacityIndexType()) { - .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8), - .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16), - .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32), - .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize), + .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(ctx, header, u8), + .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(ctx, header, u16), + .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(ctx, header, u32), } } - - fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void { + fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, ctx: ByIndexContext, header: *IndexHeader, comptime I: type) void { + const slice = self.entries.slice(); + const items = if (store_hash) slice.items(.hash) else slice.items(.key); const indexes = header.indexes(I); - entry_loop: for (self.entries.items) |entry, i| { - const h = if (store_hash) entry.hash else hash(entry.key); - const start_index = header.constrainIndex(h); - var entry_index = i; - var roll_over: usize = 0; - var distance_from_start_index: usize = 0; - while (roll_over < header.indexes_len) : ({ - roll_over += 1; + + entry_loop: for (items) |key, i| { + const h = if (store_hash) key else checkedHash(ctx, key); + const start_index = safeTruncate(usize, h); + const end_index = start_index +% indexes.len; + var index = start_index; + var entry_index = @intCast(I, i); + var distance_from_start_index: I = 0; + while (index != end_index) : ({ + index +%= 1; distance_from_start_index += 1; }) { - const index_index = header.constrainIndex(start_index + roll_over); - const next_index = indexes[index_index]; + const slot = header.constrainIndex(index); + const next_index = indexes[slot]; if (next_index.isEmpty()) { - header.maybeBumpMax(distance_from_start_index); - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, entry_index), + indexes[slot] = .{ + .distance_from_start_index = distance_from_start_index, + .entry_index = entry_index, }; continue :entry_loop; } if (next_index.distance_from_start_index < distance_from_start_index) { - header.maybeBumpMax(distance_from_start_index); - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, entry_index), + indexes[slot] = .{ + .distance_from_start_index = distance_from_start_index, + .entry_index = entry_index, }; distance_from_start_index = next_index.distance_from_start_index; entry_index = next_index.entry_index; @@ -990,98 +1568,255 @@ pub fn ArrayHashMapUnmanaged( unreachable; } } + + fn checkedHash(ctx: anytype, key: anytype) callconv(.Inline) u32 { + comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32); + // If you get a compile error on the next line, it means that + const hash = ctx.hash(key); // your generic hash function doesn't accept your key + if (@TypeOf(hash) != u32) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic hash function that returns the wrong type!\n"++ + @typeName(u32)++" was expected, but found "++@typeName(@TypeOf(hash))); + } + return hash; + } + fn checkedEql(ctx: anytype, a: anytype, b: K) callconv(.Inline) bool { + comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32); + // If you get a compile error on the next line, it means that + const eql = ctx.eql(a, b); // your generic eql function doesn't accept (self, adapt key, K) + if (@TypeOf(eql) != bool) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic eql function that returns the wrong type!\n"++ + @typeName(bool)++" was expected, but found "++@typeName(@TypeOf(eql))); + } + return eql; + } + + fn dumpState(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8) void { + if (@sizeOf(ByIndexContext) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call dumpStateContext instead."); + self.dumpStateContext(keyFmt, valueFmt, undefined); + } + fn dumpStateContext(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8, ctx: Context) void { + const p = std.debug.print; + p("{s}:\n", .{@typeName(Self)}); + const slice = self.entries.slice(); + const hash_status = if (store_hash) "stored" else "computed"; + p(" len={} capacity={} hashes {s}\n", .{slice.len, slice.capacity, hash_status}); + var i: usize = 0; + const mask: u32 = if (self.index_header) |header| header.mask() else ~@as(u32, 0); + while (i < slice.len) : (i += 1) { + const hash = if (store_hash) slice.items(.hash)[i] + else checkedHash(ctx, slice.items(.key)[i]); + if (store_hash) { + p( + " [{}]: key="++keyFmt++" value="++valueFmt++" hash=0x{x} slot=[0x{x}]\n", + .{i, slice.items(.key)[i], slice.items(.value)[i], hash, hash & mask}, + ); + } else { + p( + " [{}]: key="++keyFmt++" value="++valueFmt++" slot=[0x{x}]\n", + .{i, slice.items(.key)[i], slice.items(.value)[i], hash & mask}, + ); + } + } + if (self.index_header) |header| { + p("\n", .{}); + switch (header.capacityIndexType()) { + .u8 => self.dumpIndex(header, u8), + .u16 => self.dumpIndex(header, u16), + .u32 => self.dumpIndex(header, u32), + } + } + } + fn dumpIndex(self: Self, header: *IndexHeader, comptime I: type) void { + const p = std.debug.print; + p(" index len=0x{x} type={}\n", .{header.length(), header.capacityIndexType()}); + const indexes = header.indexes(I); + if (indexes.len == 0) return; + var is_empty = false; + for (indexes) |idx, i| { + if (idx.isEmpty()) { + is_empty = true; + } else { + if (is_empty) { + is_empty = false; + p(" ...\n", .{}); + } + p(" [0x{x}]: [{}] +{}\n", .{i, idx.entry_index, idx.distance_from_start_index}); + } + } + if (is_empty) { + p(" ...\n", .{}); + } + } }; } -const CapacityIndexType = enum { u8, u16, u32, usize }; +const CapacityIndexType = enum { u8, u16, u32 }; -fn capacityIndexType(indexes_len: usize) CapacityIndexType { - if (indexes_len < math.maxInt(u8)) +fn capacityIndexType(bit_index: u8) CapacityIndexType { + if (bit_index <= 8) return .u8; - if (indexes_len < math.maxInt(u16)) + if (bit_index <= 16) return .u16; - if (indexes_len < math.maxInt(u32)) - return .u32; - return .usize; + assert(bit_index <= 32); + return .u32; } -fn capacityIndexSize(indexes_len: usize) usize { - switch (capacityIndexType(indexes_len)) { +fn capacityIndexSize(bit_index: u8) usize { + switch (capacityIndexType(bit_index)) { .u8 => return @sizeOf(Index(u8)), .u16 => return @sizeOf(Index(u16)), .u32 => return @sizeOf(Index(u32)), - .usize => return @sizeOf(Index(usize)), } } +/// @truncate fails if the target type is larger than the +/// target value. This causes problems when one of the types +/// is usize, which may be larger or smaller than u32 on different +/// systems. This version of truncate is safe to use if either +/// parameter has dynamic size, and will perform widening conversion +/// when needed. Both arguments must have the same signedness. +fn safeTruncate(comptime T: type, val: anytype) T { + if (@bitSizeOf(T) >= @bitSizeOf(@TypeOf(val))) + return val; + return @truncate(T, val); +} + +/// A single entry in the lookup acceleration structure. These structs +/// are found in an array after the IndexHeader. Hashes index into this +/// array, and linear probing is used for collisions. fn Index(comptime I: type) type { return extern struct { + const Self = @This(); + + /// The index of this entry in the backing store. If the index is + /// empty, this is empty_sentinel. entry_index: I, + + /// The distance between this slot and its ideal placement. This is + /// used to keep maximum scan length small. This value is undefined + /// if the index is empty. distance_from_start_index: I, - const Self = @This(); + /// The special entry_index value marking an empty slot. + const empty_sentinel = ~@as(I, 0); + /// A constant empty index const empty = Self{ - .entry_index = math.maxInt(I), + .entry_index = empty_sentinel, .distance_from_start_index = undefined, }; + /// Checks if a slot is empty fn isEmpty(idx: Self) bool { - return idx.entry_index == math.maxInt(I); + return idx.entry_index == empty_sentinel; } + /// Sets a slot to empty fn setEmpty(idx: *Self) void { - idx.entry_index = math.maxInt(I); + idx.entry_index = empty_sentinel; + idx.distance_from_start_index = undefined; } }; } -/// This struct is trailed by an array of `Index(I)`, where `I` -/// and the array length are determined by `indexes_len`. +/// the byte size of the index must fit in a usize. This is a power of two +/// length * the size of an Index(u32). The index is 8 bytes (3 bits repr) +/// and max_usize + 1 is not representable, so we need to subtract out 4 bits. +const max_representable_index_len = @bitSizeOf(usize) - 4; +const max_bit_index = math.min(32, max_representable_index_len); +const min_bit_index = 5; +const max_capacity = (1 << max_bit_index) - 1; +const index_capacities = blk: { + var caps: [max_bit_index + 1]u32 = undefined; + for (caps[0..max_bit_index]) |*item, i| { + item.* = (1<<i) * 3 / 5; + } + caps[max_bit_index] = max_capacity; + break :blk caps; +}; + +/// This struct is trailed by two arrays of length indexes_len +/// of integers, whose integer size is determined by indexes_len. +/// These arrays are indexed by constrainIndex(hash). The +/// entryIndexes array contains the index in the dense backing store +/// where the entry's data can be found. Entries which are not in +/// use have their index value set to emptySentinel(I). +/// The entryDistances array stores the distance between an entry +/// and its ideal hash bucket. This is used when adding elements +/// to balance the maximum scan length. const IndexHeader = struct { - max_distance_from_start_index: usize, - indexes_len: usize, + /// This field tracks the total number of items in the arrays following + /// this header. It is the bit index of the power of two number of indices. + /// This value is between min_bit_index and max_bit_index, inclusive. + bit_index: u8 align(@alignOf(u32)), + /// Map from an incrementing index to an index slot in the attached arrays. fn constrainIndex(header: IndexHeader, i: usize) usize { // This is an optimization for modulo of power of two integers; // it requires `indexes_len` to always be a power of two. - return i & (header.indexes_len - 1); + return @intCast(usize, i & header.mask()); } + /// Returns the attached array of indexes. I must match the type + /// returned by capacityIndexType. fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { - const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); - return start[0..header.indexes_len]; + const start_ptr = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); + return start_ptr[0..header.length()]; } + /// Returns the type used for the index arrays. fn capacityIndexType(header: IndexHeader) CapacityIndexType { - return hash_map.capacityIndexType(header.indexes_len); + return hash_map.capacityIndexType(header.bit_index); } - fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void { - if (distance_from_start_index > header.max_distance_from_start_index) { - header.max_distance_from_start_index = distance_from_start_index; - } + fn capacity(self: IndexHeader) u32 { + return index_capacities[self.bit_index]; + } + fn length(self: IndexHeader) usize { + return @as(usize, 1) << @intCast(math.Log2Int(usize), self.bit_index); + } + fn mask(self: IndexHeader) u32 { + return @intCast(u32, self.length() - 1); + } + + fn findBitIndex(desired_capacity: usize) !u8 { + if (desired_capacity > max_capacity) return error.OutOfMemory; + var new_bit_index = @intCast(u8, std.math.log2_int_ceil(usize, desired_capacity)); + if (desired_capacity > index_capacities[new_bit_index]) new_bit_index += 1; + if (new_bit_index < min_bit_index) new_bit_index = min_bit_index; + assert(desired_capacity <= index_capacities[new_bit_index]); + return new_bit_index; } - fn alloc(allocator: *Allocator, len: usize) !*IndexHeader { - const index_size = hash_map.capacityIndexSize(len); + /// Allocates an index header, and fills the entryIndexes array with empty. + /// The distance array contents are undefined. + fn alloc(allocator: *Allocator, new_bit_index: u8) !*IndexHeader { + const len = @as(usize, 1) << @intCast(math.Log2Int(usize), new_bit_index); + const index_size = hash_map.capacityIndexSize(new_bit_index); const nbytes = @sizeOf(IndexHeader) + index_size * len; const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact); @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader)); const result = @ptrCast(*IndexHeader, bytes.ptr); result.* = .{ - .max_distance_from_start_index = 0, - .indexes_len = len, + .bit_index = new_bit_index, }; return result; } + /// Releases the memory for a header and its associated arrays. fn free(header: *IndexHeader, allocator: *Allocator) void { - const index_size = hash_map.capacityIndexSize(header.indexes_len); - const ptr = @ptrCast([*]u8, header); - const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size]; + const index_size = hash_map.capacityIndexSize(header.bit_index); + const ptr = @ptrCast([*]align(@alignOf(IndexHeader)) u8, header); + const slice = ptr[0 .. @sizeOf(IndexHeader) + header.length() * index_size]; allocator.free(slice); } + + // Verify that the header has sufficient alignment to produce aligned arrays. + comptime { + if (@alignOf(u32) > @alignOf(IndexHeader)) + @compileError("IndexHeader must have a larger alignment than its indexes!"); + } }; test "basic hash map usage" { @@ -1099,31 +1834,32 @@ test "basic hash map usage" { const gop1 = try map.getOrPut(5); try testing.expect(gop1.found_existing == true); - try testing.expect(gop1.entry.value == 55); + try testing.expect(gop1.value_ptr.* == 55); try testing.expect(gop1.index == 4); - gop1.entry.value = 77; - try testing.expect(map.getEntry(5).?.value == 77); + gop1.value_ptr.* = 77; + try testing.expect(map.getEntry(5).?.value_ptr.* == 77); const gop2 = try map.getOrPut(99); try testing.expect(gop2.found_existing == false); try testing.expect(gop2.index == 5); - gop2.entry.value = 42; - try testing.expect(map.getEntry(99).?.value == 42); + gop2.value_ptr.* = 42; + try testing.expect(map.getEntry(99).?.value_ptr.* == 42); const gop3 = try map.getOrPutValue(5, 5); - try testing.expect(gop3.value == 77); + try testing.expect(gop3.value_ptr.* == 77); const gop4 = try map.getOrPutValue(100, 41); - try testing.expect(gop4.value == 41); + try testing.expect(gop4.value_ptr.* == 41); try testing.expect(map.contains(2)); - try testing.expect(map.getEntry(2).?.value == 22); + try testing.expect(map.getEntry(2).?.value_ptr.* == 22); try testing.expect(map.get(2).? == 22); - const rmv1 = map.swapRemove(2); + const rmv1 = map.fetchSwapRemove(2); try testing.expect(rmv1.?.key == 2); try testing.expect(rmv1.?.value == 22); - try testing.expect(map.swapRemove(2) == null); + try testing.expect(map.fetchSwapRemove(2) == null); + try testing.expect(map.swapRemove(2) == false); try testing.expect(map.getEntry(2) == null); try testing.expect(map.get(2) == null); @@ -1131,22 +1867,23 @@ test "basic hash map usage" { try testing.expect(map.getIndex(100).? == 1); const gop5 = try map.getOrPut(5); try testing.expect(gop5.found_existing == true); - try testing.expect(gop5.entry.value == 77); + try testing.expect(gop5.value_ptr.* == 77); try testing.expect(gop5.index == 4); // Whereas, if we do an `orderedRemove`, it should move the index forward one spot. - const rmv2 = map.orderedRemove(100); + const rmv2 = map.fetchOrderedRemove(100); try testing.expect(rmv2.?.key == 100); try testing.expect(rmv2.?.value == 41); - try testing.expect(map.orderedRemove(100) == null); + try testing.expect(map.fetchOrderedRemove(100) == null); + try testing.expect(map.orderedRemove(100) == false); try testing.expect(map.getEntry(100) == null); try testing.expect(map.get(100) == null); const gop6 = try map.getOrPut(5); try testing.expect(gop6.found_existing == true); - try testing.expect(gop6.entry.value == 77); + try testing.expect(gop6.value_ptr.* == 77); try testing.expect(gop6.index == 3); - map.removeAssertDiscard(3); + try testing.expect(map.swapRemove(3)); } test "iterator hash map" { @@ -1154,7 +1891,7 @@ test "iterator hash map" { defer reset_map.deinit(); // test ensureCapacity with a 0 parameter - try reset_map.ensureCapacity(0); + try reset_map.ensureTotalCapacity(0); try reset_map.putNoClobber(0, 11); try reset_map.putNoClobber(1, 22); @@ -1178,7 +1915,7 @@ test "iterator hash map" { var count: usize = 0; while (it.next()) |entry| : (count += 1) { - buffer[@intCast(usize, entry.key)] = entry.value; + buffer[@intCast(usize, entry.key_ptr.*)] = entry.value_ptr.*; } try testing.expect(count == 3); try testing.expect(it.next() == null); @@ -1190,7 +1927,7 @@ test "iterator hash map" { it.reset(); count = 0; while (it.next()) |entry| { - buffer[@intCast(usize, entry.key)] = entry.value; + buffer[@intCast(usize, entry.key_ptr.*)] = entry.value_ptr.*; count += 1; if (count >= 2) break; } @@ -1201,15 +1938,15 @@ test "iterator hash map" { it.reset(); var entry = it.next().?; - try testing.expect(entry.key == first_entry.key); - try testing.expect(entry.value == first_entry.value); + try testing.expect(entry.key_ptr.* == first_entry.key_ptr.*); + try testing.expect(entry.value_ptr.* == first_entry.value_ptr.*); } test "ensure capacity" { var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); defer map.deinit(); - try map.ensureCapacity(20); + try map.ensureTotalCapacity(20); const initial_capacity = map.capacity(); try testing.expect(initial_capacity >= 20); var i: i32 = 0; @@ -1220,6 +1957,59 @@ test "ensure capacity" { try testing.expect(initial_capacity == map.capacity()); } +test "big map" { + var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + var i: i32 = 0; + while (i < 8) : (i += 1) { + try map.put(i, i + 10); + } + + i = 0; + while (i < 8) : (i += 1) { + try testing.expectEqual(@as(?i32, i + 10), map.get(i)); + } + while (i < 16) : (i += 1) { + try testing.expectEqual(@as(?i32, null), map.get(i)); + } + + i = 4; + while (i < 12) : (i += 1) { + try map.put(i, i + 12); + } + + i = 0; + while (i < 4) : (i += 1) { + try testing.expectEqual(@as(?i32, i + 10), map.get(i)); + } + while (i < 12) : (i += 1) { + try testing.expectEqual(@as(?i32, i + 12), map.get(i)); + } + while (i < 16) : (i += 1) { + try testing.expectEqual(@as(?i32, null), map.get(i)); + } + + i = 0; + while (i < 4) : (i += 1) { + try testing.expect(map.orderedRemove(i)); + } + while (i < 8) : (i += 1) { + try testing.expect(map.swapRemove(i)); + } + + i = 0; + while (i < 8) : (i += 1) { + try testing.expectEqual(@as(?i32, null), map.get(i)); + } + while (i < 12) : (i += 1) { + try testing.expectEqual(@as(?i32, i + 12), map.get(i)); + } + while (i < 16) : (i += 1) { + try testing.expectEqual(@as(?i32, null), map.get(i)); + } +} + test "clone" { var original = AutoArrayHashMap(i32, i32).init(std.testing.allocator); defer original.deinit(); @@ -1235,7 +2025,14 @@ test "clone" { i = 0; while (i < 10) : (i += 1) { + try testing.expect(original.get(i).? == i * 10); try testing.expect(copy.get(i).? == i * 10); + try testing.expect(original.getPtr(i).? != copy.getPtr(i).?); + } + + while (i < 20) : (i += 1) { + try testing.expect(original.get(i) == null); + try testing.expect(copy.get(i) == null); } } @@ -1261,7 +2058,7 @@ test "shrink" { const gop = try map.getOrPut(i); if (i < 17) { try testing.expect(gop.found_existing == true); - try testing.expect(gop.entry.value == i * 10); + try testing.expect(gop.value_ptr.* == i * 10); } else try testing.expect(gop.found_existing == false); } @@ -1274,7 +2071,7 @@ test "shrink" { const gop = try map.getOrPut(i); if (i < 15) { try testing.expect(gop.found_existing == true); - try testing.expect(gop.entry.value == i * 10); + try testing.expect(gop.value_ptr.* == i * 10); } else try testing.expect(gop.found_existing == false); } } @@ -1298,7 +2095,7 @@ test "pop" { } test "reIndex" { - var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + var map = ArrayHashMap(i32, i32, AutoContext(i32), true).init(std.testing.allocator); defer map.deinit(); // Populate via the API. @@ -1312,13 +2109,13 @@ test "reIndex" { // Now write to the underlying array list directly. const num_unindexed_entries = 20; - const hash = getAutoHashFn(i32); + const hash = getAutoHashFn(i32, void); var al = &map.unmanaged.entries; while (i < num_indexed_entries + num_unindexed_entries) : (i += 1) { try al.append(std.testing.allocator, .{ .key = i, .value = i * 10, - .hash = {}, + .hash = hash({}, i), }); } @@ -1328,36 +2125,7 @@ test "reIndex" { while (i < num_indexed_entries + num_unindexed_entries) : (i += 1) { const gop = try map.getOrPut(i); try testing.expect(gop.found_existing == true); - try testing.expect(gop.entry.value == i * 10); - try testing.expect(gop.index == i); - } -} - -test "fromOwnedArrayList" { - const array_hash_map_type = AutoArrayHashMap(i32, i32); - var al = std.ArrayListUnmanaged(array_hash_map_type.Entry){}; - const hash = getAutoHashFn(i32); - - // Populate array list. - const num_entries = 20; - var i: i32 = 0; - while (i < num_entries) : (i += 1) { - try al.append(std.testing.allocator, .{ - .key = i, - .value = i * 10, - .hash = {}, - }); - } - - // Now instantiate using `fromOwnedArrayList`. - var map = try array_hash_map_type.fromOwnedArrayList(std.testing.allocator, al); - defer map.deinit(); - - i = 0; - while (i < num_entries) : (i += 1) { - const gop = try map.getOrPut(i); - try testing.expect(gop.found_existing == true); - try testing.expect(gop.entry.value == i * 10); + try testing.expect(gop.value_ptr.* == i * 10); try testing.expect(gop.index == i); } } @@ -1365,34 +2133,52 @@ test "fromOwnedArrayList" { test "auto store_hash" { const HasCheapEql = AutoArrayHashMap(i32, i32); const HasExpensiveEql = AutoArrayHashMap([32]i32, i32); - try testing.expect(meta.fieldInfo(HasCheapEql.Entry, .hash).field_type == void); - try testing.expect(meta.fieldInfo(HasExpensiveEql.Entry, .hash).field_type != void); + try testing.expect(meta.fieldInfo(HasCheapEql.Data, .hash).field_type == void); + try testing.expect(meta.fieldInfo(HasExpensiveEql.Data, .hash).field_type != void); const HasCheapEqlUn = AutoArrayHashMapUnmanaged(i32, i32); const HasExpensiveEqlUn = AutoArrayHashMapUnmanaged([32]i32, i32); - try testing.expect(meta.fieldInfo(HasCheapEqlUn.Entry, .hash).field_type == void); - try testing.expect(meta.fieldInfo(HasExpensiveEqlUn.Entry, .hash).field_type != void); + try testing.expect(meta.fieldInfo(HasCheapEqlUn.Data, .hash).field_type == void); + try testing.expect(meta.fieldInfo(HasExpensiveEqlUn.Data, .hash).field_type != void); +} + +test "compile everything" { + std.testing.refAllDecls(AutoArrayHashMap(i32, i32)); + std.testing.refAllDecls(StringArrayHashMap([]const u8)); + std.testing.refAllDecls(AutoArrayHashMap(i32, void)); + std.testing.refAllDecls(StringArrayHashMap(u0)); + std.testing.refAllDecls(AutoArrayHashMapUnmanaged(i32, i32)); + std.testing.refAllDecls(StringArrayHashMapUnmanaged([]const u8)); + std.testing.refAllDecls(AutoArrayHashMapUnmanaged(i32, void)); + std.testing.refAllDecls(StringArrayHashMapUnmanaged(u0)); } -pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { +pub fn getHashPtrAddrFn(comptime K: type, comptime Context: type) (fn (Context, K) u32) { return struct { - fn hash(key: K) u32 { - return getAutoHashFn(usize)(@ptrToInt(key)); + fn hash(ctx: Context, key: K) u32 { + return getAutoHashFn(usize, void)({}, @ptrToInt(key)); } }.hash; } -pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { +pub fn getTrivialEqlFn(comptime K: type, comptime Context: type) (fn (Context, K, K) bool) { return struct { - fn eql(a: K, b: K) bool { + fn eql(ctx: Context, a: K, b: K) bool { return a == b; } }.eql; } -pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { +pub fn AutoContext(comptime K: type) type { + return struct { + pub const hash = getAutoHashFn(K, @This()); + pub const eql = getAutoEqlFn(K, @This()); + }; +} + +pub fn getAutoHashFn(comptime K: type, comptime Context: type) (fn (Context, K) u32) { return struct { - fn hash(key: K) u32 { + fn hash(ctx: Context, key: K) u32 { if (comptime trait.hasUniqueRepresentation(K)) { return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key))); } else { @@ -1404,9 +2190,9 @@ pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { }.hash; } -pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { +pub fn getAutoEqlFn(comptime K: type, comptime Context: type) (fn (Context, K, K) bool) { return struct { - fn eql(a: K, b: K) bool { + fn eql(ctx: Context, a: K, b: K) bool { return meta.eql(a, b); } }.eql; @@ -1430,9 +2216,9 @@ pub fn autoEqlIsCheap(comptime K: type) bool { }; } -pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) { +pub fn getAutoHashStratFn(comptime K: type, comptime Context: type, comptime strategy: std.hash.Strategy) (fn (Context, K) u32) { return struct { - fn hash(key: K) u32 { + fn hash(ctx: Context, key: K) u32 { var hasher = Wyhash.init(0); std.hash.autoHashStrat(&hasher, key, strategy); return @truncate(u32, hasher.final()); diff --git a/lib/std/buf_map.zig b/lib/std/buf_map.zig index 4708b9c37b..3494fce032 100644 --- a/lib/std/buf_map.zig +++ b/lib/std/buf_map.zig @@ -16,65 +16,82 @@ pub const BufMap = struct { const BufMapHashMap = StringHashMap([]const u8); + /// Create a BufMap backed by a specific allocator. + /// That allocator will be used for both backing allocations + /// and string deduplication. pub fn init(allocator: *Allocator) BufMap { var self = BufMap{ .hash_map = BufMapHashMap.init(allocator) }; return self; } + /// Free the backing storage of the map, as well as all + /// of the stored keys and values. pub fn deinit(self: *BufMap) void { var it = self.hash_map.iterator(); - while (true) { - const entry = it.next() orelse break; - self.free(entry.key); - self.free(entry.value); + while (it.next()) |entry| { + self.free(entry.key_ptr.*); + self.free(entry.value_ptr.*); } self.hash_map.deinit(); } - /// Same as `set` but the key and value become owned by the BufMap rather + /// Same as `put` but the key and value become owned by the BufMap rather /// than being copied. - /// If `setMove` fails, the ownership of key and value does not transfer. - pub fn setMove(self: *BufMap, key: []u8, value: []u8) !void { + /// If `putMove` fails, the ownership of key and value does not transfer. + pub fn putMove(self: *BufMap, key: []u8, value: []u8) !void { const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { - self.free(get_or_put.entry.key); - self.free(get_or_put.entry.value); - get_or_put.entry.key = key; + self.free(get_or_put.key_ptr.*); + self.free(get_or_put.value_ptr.*); + get_or_put.key_ptr.* = key; } - get_or_put.entry.value = value; + get_or_put.value_ptr.* = value; } /// `key` and `value` are copied into the BufMap. - pub fn set(self: *BufMap, key: []const u8, value: []const u8) !void { + pub fn put(self: *BufMap, key: []const u8, value: []const u8) !void { const value_copy = try self.copy(value); errdefer self.free(value_copy); const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { - self.free(get_or_put.entry.value); + self.free(get_or_put.value_ptr.*); } else { - get_or_put.entry.key = self.copy(key) catch |err| { + get_or_put.key_ptr.* = self.copy(key) catch |err| { _ = self.hash_map.remove(key); return err; }; } - get_or_put.entry.value = value_copy; + get_or_put.value_ptr.* = value_copy; } + /// Find the address of the value associated with a key. + /// The returned pointer is invalidated if the map resizes. + pub fn getPtr(self: BufMap, key: []const u8) ?*[]const u8 { + return self.hash_map.getPtr(key); + } + + /// Return the map's copy of the value associated with + /// a key. The returned string is invalidated if this + /// key is removed from the map. pub fn get(self: BufMap, key: []const u8) ?[]const u8 { return self.hash_map.get(key); } - pub fn delete(self: *BufMap, key: []const u8) void { - const entry = self.hash_map.remove(key) orelse return; - self.free(entry.key); - self.free(entry.value); + /// Removes the item from the map and frees its value. + /// This invalidates the value returned by get() for this key. + pub fn remove(self: *BufMap, key: []const u8) void { + const kv = self.hash_map.fetchRemove(key) orelse return; + self.free(kv.key); + self.free(kv.value); } + /// Returns the number of KV pairs stored in the map. pub fn count(self: BufMap) usize { return self.hash_map.count(); } + /// Returns an iterator over entries in the map. pub fn iterator(self: *const BufMap) BufMapHashMap.Iterator { return self.hash_map.iterator(); } @@ -93,21 +110,21 @@ test "BufMap" { var bufmap = BufMap.init(allocator); defer bufmap.deinit(); - try bufmap.set("x", "1"); + try bufmap.put("x", "1"); try testing.expect(mem.eql(u8, bufmap.get("x").?, "1")); try testing.expect(1 == bufmap.count()); - try bufmap.set("x", "2"); + try bufmap.put("x", "2"); try testing.expect(mem.eql(u8, bufmap.get("x").?, "2")); try testing.expect(1 == bufmap.count()); - try bufmap.set("x", "3"); + try bufmap.put("x", "3"); try testing.expect(mem.eql(u8, bufmap.get("x").?, "3")); try testing.expect(1 == bufmap.count()); - bufmap.delete("x"); + bufmap.remove("x"); try testing.expect(0 == bufmap.count()); - try bufmap.setMove(try allocator.dupe(u8, "k"), try allocator.dupe(u8, "v1")); - try bufmap.setMove(try allocator.dupe(u8, "k"), try allocator.dupe(u8, "v2")); + try bufmap.putMove(try allocator.dupe(u8, "k"), try allocator.dupe(u8, "v1")); + try bufmap.putMove(try allocator.dupe(u8, "k"), try allocator.dupe(u8, "v2")); } diff --git a/lib/std/buf_set.zig b/lib/std/buf_set.zig index 5e09aa6bcb..c2a1f9bcb9 100644 --- a/lib/std/buf_set.zig +++ b/lib/std/buf_set.zig @@ -9,50 +9,69 @@ const mem = @import("mem.zig"); const Allocator = mem.Allocator; const testing = std.testing; +/// A BufSet is a set of strings. The BufSet duplicates +/// strings internally, and never takes ownership of strings +/// which are passed to it. pub const BufSet = struct { hash_map: BufSetHashMap, const BufSetHashMap = StringHashMap(void); + pub const Iterator = BufSetHashMap.KeyIterator; + /// Create a BufSet using an allocator. The allocator will + /// be used internally for both backing allocations and + /// string duplication. pub fn init(a: *Allocator) BufSet { var self = BufSet{ .hash_map = BufSetHashMap.init(a) }; return self; } + /// Free a BufSet along with all stored keys. pub fn deinit(self: *BufSet) void { - var it = self.hash_map.iterator(); - while (it.next()) |entry| { - self.free(entry.key); + var it = self.hash_map.keyIterator(); + while (it.next()) |key_ptr| { + self.free(key_ptr.*); } self.hash_map.deinit(); self.* = undefined; } - pub fn put(self: *BufSet, key: []const u8) !void { - if (self.hash_map.get(key) == null) { - const key_copy = try self.copy(key); - errdefer self.free(key_copy); - try self.hash_map.put(key_copy, {}); + /// Insert an item into the BufSet. The item will be + /// copied, so the caller may delete or reuse the + /// passed string immediately. + pub fn insert(self: *BufSet, value: []const u8) !void { + const gop = try self.hash_map.getOrPut(value); + if (!gop.found_existing) { + gop.key_ptr.* = self.copy(value) catch |err| { + _ = self.hash_map.remove(value); + return err; + }; } } - pub fn exists(self: BufSet, key: []const u8) bool { - return self.hash_map.get(key) != null; + /// Check if the set contains an item matching the passed string + pub fn contains(self: BufSet, value: []const u8) bool { + return self.hash_map.contains(value); } - pub fn delete(self: *BufSet, key: []const u8) void { - const entry = self.hash_map.remove(key) orelse return; - self.free(entry.key); + /// Remove an item from the set. + pub fn remove(self: *BufSet, value: []const u8) void { + const kv = self.hash_map.fetchRemove(value) orelse return; + self.free(kv.key); } + /// Returns the number of items stored in the set pub fn count(self: *const BufSet) usize { return self.hash_map.count(); } - pub fn iterator(self: *const BufSet) BufSetHashMap.Iterator { - return self.hash_map.iterator(); + /// Returns an iterator over the items stored in the set. + /// Iteration order is arbitrary. + pub fn iterator(self: *const BufSet) Iterator { + return self.hash_map.keyIterator(); } + /// Get the allocator used by this set pub fn allocator(self: *const BufSet) *Allocator { return self.hash_map.allocator; } @@ -72,12 +91,12 @@ test "BufSet" { var bufset = BufSet.init(std.testing.allocator); defer bufset.deinit(); - try bufset.put("x"); + try bufset.insert("x"); try testing.expect(bufset.count() == 1); - bufset.delete("x"); + bufset.remove("x"); try testing.expect(bufset.count() == 0); - try bufset.put("x"); - try bufset.put("y"); - try bufset.put("z"); + try bufset.insert("x"); + try bufset.insert("y"); + try bufset.insert("z"); } diff --git a/lib/std/build.zig b/lib/std/build.zig index 37ad0c6d17..572f2b2be9 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -504,10 +504,10 @@ pub const Builder = struct { } self.available_options_list.append(available_option) catch unreachable; - const entry = self.user_input_options.getEntry(name) orelse return null; - entry.value.used = true; + const option_ptr = self.user_input_options.getPtr(name) orelse return null; + option_ptr.used = true; switch (type_id) { - .Bool => switch (entry.value.value) { + .Bool => switch (option_ptr.value) { .Flag => return true, .Scalar => |s| { if (mem.eql(u8, s, "true")) { @@ -526,7 +526,7 @@ pub const Builder = struct { return null; }, }, - .Int => switch (entry.value.value) { + .Int => switch (option_ptr.value) { .Flag => { warn("Expected -D{s} to be an integer, but received a boolean.\n\n", .{name}); self.markInvalidUserInput(); @@ -553,7 +553,7 @@ pub const Builder = struct { return null; }, }, - .Float => switch (entry.value.value) { + .Float => switch (option_ptr.value) { .Flag => { warn("Expected -D{s} to be a float, but received a boolean.\n\n", .{name}); self.markInvalidUserInput(); @@ -573,7 +573,7 @@ pub const Builder = struct { return null; }, }, - .Enum => switch (entry.value.value) { + .Enum => switch (option_ptr.value) { .Flag => { warn("Expected -D{s} to be a string, but received a boolean.\n\n", .{name}); self.markInvalidUserInput(); @@ -594,7 +594,7 @@ pub const Builder = struct { return null; }, }, - .String => switch (entry.value.value) { + .String => switch (option_ptr.value) { .Flag => { warn("Expected -D{s} to be a string, but received a boolean.\n\n", .{name}); self.markInvalidUserInput(); @@ -607,7 +607,7 @@ pub const Builder = struct { }, .Scalar => |s| return s, }, - .List => switch (entry.value.value) { + .List => switch (option_ptr.value) { .Flag => { warn("Expected -D{s} to be a list, but received a boolean.\n\n", .{name}); self.markInvalidUserInput(); @@ -769,7 +769,7 @@ pub const Builder = struct { const value = self.dupe(value_raw); const gop = try self.user_input_options.getOrPut(name); if (!gop.found_existing) { - gop.entry.value = UserInputOption{ + gop.value_ptr.* = UserInputOption{ .name = name, .value = UserValue{ .Scalar = value }, .used = false, @@ -778,7 +778,7 @@ pub const Builder = struct { } // option already exists - switch (gop.entry.value.value) { + switch (gop.value_ptr.value) { UserValue.Scalar => |s| { // turn it into a list var list = ArrayList([]const u8).init(self.allocator); @@ -811,7 +811,7 @@ pub const Builder = struct { const name = self.dupe(name_raw); const gop = try self.user_input_options.getOrPut(name); if (!gop.found_existing) { - gop.entry.value = UserInputOption{ + gop.value_ptr.* = UserInputOption{ .name = name, .value = UserValue{ .Flag = {} }, .used = false, @@ -820,7 +820,7 @@ pub const Builder = struct { } // option already exists - switch (gop.entry.value.value) { + switch (gop.value_ptr.value) { UserValue.Scalar => |s| { warn("Flag '-D{s}' conflicts with option '-D{s}={s}'.\n", .{ name, name, s }); return true; @@ -866,10 +866,9 @@ pub const Builder = struct { pub fn validateUserInputDidItFail(self: *Builder) bool { // make sure all args are used var it = self.user_input_options.iterator(); - while (true) { - const entry = it.next() orelse break; - if (!entry.value.used) { - warn("Invalid option: -D{s}\n\n", .{entry.key}); + while (it.next()) |entry| { + if (!entry.value_ptr.used) { + warn("Invalid option: -D{s}\n\n", .{entry.key_ptr.*}); self.markInvalidUserInput(); } } @@ -1653,7 +1652,8 @@ pub const LibExeObjStep = struct { pub fn linkFramework(self: *LibExeObjStep, framework_name: []const u8) void { assert(self.target.isDarwin()); - self.frameworks.put(self.builder.dupe(framework_name)) catch unreachable; + // Note: No need to dupe because frameworks dupes internally. + self.frameworks.insert(framework_name) catch unreachable; } /// Returns whether the library, executable, or object depends on a particular system library. @@ -2155,8 +2155,8 @@ pub const LibExeObjStep = struct { // Inherit dependencies on darwin frameworks if (self.target.isDarwin() and !other.isDynamicLibrary()) { var it = other.frameworks.iterator(); - while (it.next()) |entry| { - self.frameworks.put(entry.key) catch unreachable; + while (it.next()) |framework| { + self.frameworks.insert(framework.*) catch unreachable; } } } @@ -2591,9 +2591,9 @@ pub const LibExeObjStep = struct { } var it = self.frameworks.iterator(); - while (it.next()) |entry| { + while (it.next()) |framework| { zig_args.append("-framework") catch unreachable; - zig_args.append(entry.key) catch unreachable; + zig_args.append(framework.*) catch unreachable; } } diff --git a/lib/std/build/run.zig b/lib/std/build/run.zig index 1e16813cb8..9a99e1b078 100644 --- a/lib/std/build/run.zig +++ b/lib/std/build/run.zig @@ -117,9 +117,9 @@ pub const RunStep = struct { if (prev_path) |pp| { const new_path = self.builder.fmt("{s}" ++ [1]u8{fs.path.delimiter} ++ "{s}", .{ pp, search_path }); - env_map.set(key, new_path) catch unreachable; + env_map.put(key, new_path) catch unreachable; } else { - env_map.set(key, self.builder.dupePath(search_path)) catch unreachable; + env_map.put(key, self.builder.dupePath(search_path)) catch unreachable; } } @@ -134,10 +134,8 @@ pub const RunStep = struct { pub fn setEnvironmentVariable(self: *RunStep, key: []const u8, value: []const u8) void { const env_map = self.getEnvMap(); - env_map.set( - self.builder.dupe(key), - self.builder.dupe(value), - ) catch unreachable; + // Note: no need to dupe these strings because BufMap does it internally. + env_map.put(key, value) catch unreachable; } pub fn expectStdErrEqual(self: *RunStep, bytes: []const u8) void { diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig index a4ba12ef2f..56f3e9b1df 100644 --- a/lib/std/child_process.zig +++ b/lib/std/child_process.zig @@ -955,7 +955,7 @@ pub fn createWindowsEnvBlock(allocator: *mem.Allocator, env_map: *const BufMap) while (it.next()) |pair| { // +1 for '=' // +1 for null byte - max_chars_needed += pair.key.len + pair.value.len + 2; + max_chars_needed += pair.key_ptr.len + pair.value_ptr.len + 2; } break :x max_chars_needed; }; @@ -965,10 +965,10 @@ pub fn createWindowsEnvBlock(allocator: *mem.Allocator, env_map: *const BufMap) var it = env_map.iterator(); var i: usize = 0; while (it.next()) |pair| { - i += try unicode.utf8ToUtf16Le(result[i..], pair.key); + i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*); result[i] = '='; i += 1; - i += try unicode.utf8ToUtf16Le(result[i..], pair.value); + i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*); result[i] = 0; i += 1; } @@ -990,10 +990,10 @@ pub fn createNullDelimitedEnvMap(arena: *mem.Allocator, env_map: *const std.BufM var it = env_map.iterator(); var i: usize = 0; while (it.next()) |pair| : (i += 1) { - const env_buf = try arena.allocSentinel(u8, pair.key.len + pair.value.len + 1, 0); - mem.copy(u8, env_buf, pair.key); - env_buf[pair.key.len] = '='; - mem.copy(u8, env_buf[pair.key.len + 1 ..], pair.value); + const env_buf = try arena.allocSentinel(u8, pair.key_ptr.len + pair.value_ptr.len + 1, 0); + mem.copy(u8, env_buf, pair.key_ptr.*); + env_buf[pair.key_ptr.len] = '='; + mem.copy(u8, env_buf[pair.key_ptr.len + 1 ..], pair.value_ptr.*); envp_buf[i] = env_buf.ptr; } assert(i == envp_count); @@ -1007,11 +1007,11 @@ test "createNullDelimitedEnvMap" { var envmap = BufMap.init(allocator); defer envmap.deinit(); - try envmap.set("HOME", "/home/ifreund"); - try envmap.set("WAYLAND_DISPLAY", "wayland-1"); - try envmap.set("DISPLAY", ":1"); - try envmap.set("DEBUGINFOD_URLS", " "); - try envmap.set("XCURSOR_SIZE", "24"); + try envmap.put("HOME", "/home/ifreund"); + try envmap.put("WAYLAND_DISPLAY", "wayland-1"); + try envmap.put("DISPLAY", ":1"); + try envmap.put("DEBUGINFOD_URLS", " "); + try envmap.put("XCURSOR_SIZE", "24"); var arena = std.heap.ArenaAllocator.init(allocator); defer arena.deinit(); diff --git a/lib/std/fs/watch.zig b/lib/std/fs/watch.zig index f37bf2b51a..a9fd55da0c 100644 --- a/lib/std/fs/watch.zig +++ b/lib/std/fs/watch.zig @@ -165,11 +165,13 @@ pub fn Watch(comptime V: type) type { .macos, .freebsd, .netbsd, .dragonfly, .openbsd => { var it = self.os_data.file_table.iterator(); while (it.next()) |entry| { - entry.value.cancelled = true; + const key = entry.key_ptr.*; + const value = entry.value_ptr.*; + value.cancelled = true; // @TODO Close the fd here? - await entry.value.putter_frame; - self.allocator.free(entry.key); - self.allocator.destroy(entry.value); + await value.putter_frame; + self.allocator.free(key); + self.allocator.destroy(value); } }, .linux => { @@ -177,9 +179,9 @@ pub fn Watch(comptime V: type) type { { // Remove all directory watches linuxEventPutter will take care of // cleaning up the memory and closing the inotify fd. - var dir_it = self.os_data.wd_table.iterator(); - while (dir_it.next()) |wd_entry| { - const rc = os.linux.inotify_rm_watch(self.os_data.inotify_fd, wd_entry.key); + var dir_it = self.os_data.wd_table.keyIterator(); + while (dir_it.next()) |wd_key| { + const rc = os.linux.inotify_rm_watch(self.os_data.inotify_fd, wd_key.*); // Errno can only be EBADF, EINVAL if either the inotify fs or the wd are invalid std.debug.assert(rc == 0); } @@ -202,13 +204,13 @@ pub fn Watch(comptime V: type) type { await dir_entry.value.putter_frame; } - self.allocator.free(dir_entry.key); - var file_it = dir_entry.value.file_table.iterator(); + self.allocator.free(dir_entry.key_ptr.*); + var file_it = dir_entry.value.file_table.keyIterator(); while (file_it.next()) |file_entry| { - self.allocator.free(file_entry.key); + self.allocator.free(file_entry.*); } dir_entry.value.file_table.deinit(self.allocator); - self.allocator.destroy(dir_entry.value); + self.allocator.destroy(dir_entry.value_ptr.*); } self.os_data.dir_table.deinit(self.allocator); }, @@ -236,18 +238,18 @@ pub fn Watch(comptime V: type) type { defer held.release(); const gop = try self.os_data.file_table.getOrPut(self.allocator, realpath); - errdefer self.os_data.file_table.removeAssertDiscard(realpath); + errdefer assert(self.os_data.file_table.remove(realpath)); if (gop.found_existing) { - const prev_value = gop.entry.value.value; - gop.entry.value.value = value; + const prev_value = gop.value_ptr.value; + gop.value_ptr.value = value; return prev_value; } - gop.entry.key = try self.allocator.dupe(u8, realpath); - errdefer self.allocator.free(gop.entry.key); - gop.entry.value = try self.allocator.create(OsData.Put); - errdefer self.allocator.destroy(gop.entry.value); - gop.entry.value.* = .{ + gop.key_ptr.* = try self.allocator.dupe(u8, realpath); + errdefer self.allocator.free(gop.key_ptr.*); + gop.value_ptr.* = try self.allocator.create(OsData.Put); + errdefer self.allocator.destroy(gop.value_ptr.*); + gop.value_ptr.* = .{ .putter_frame = undefined, .value = value, }; @@ -255,7 +257,7 @@ pub fn Watch(comptime V: type) type { // @TODO Can I close this fd and get an error from bsdWaitKev? const flags = if (comptime std.Target.current.isDarwin()) os.O_SYMLINK | os.O_EVTONLY else 0; const fd = try os.open(realpath, flags, 0); - gop.entry.value.putter_frame = async self.kqPutEvents(fd, gop.entry.key, gop.entry.value); + gop.value_ptr.putter_frame = async self.kqPutEvents(fd, gop.key_ptr.*, gop.value_ptr.*); return null; } @@ -345,24 +347,24 @@ pub fn Watch(comptime V: type) type { defer held.release(); const gop = try self.os_data.wd_table.getOrPut(self.allocator, wd); - errdefer self.os_data.wd_table.removeAssertDiscard(wd); + errdefer assert(self.os_data.wd_table.remove(wd)); if (!gop.found_existing) { - gop.entry.value = OsData.Dir{ + gop.value_ptr.* = OsData.Dir{ .dirname = try self.allocator.dupe(u8, dirname), .file_table = OsData.FileTable.init(self.allocator), }; } - const dir = &gop.entry.value; + const dir = gop.value_ptr; const file_table_gop = try dir.file_table.getOrPut(self.allocator, basename); - errdefer dir.file_table.removeAssertDiscard(basename); + errdefer assert(dir.file_table.remove(basename)); if (file_table_gop.found_existing) { - const prev_value = file_table_gop.entry.value; - file_table_gop.entry.value = value; + const prev_value = file_table_gop.value_ptr.*; + file_table_gop.value_ptr.* = value; return prev_value; } else { - file_table_gop.entry.key = try self.allocator.dupe(u8, basename); - file_table_gop.entry.value = value; + file_table_gop.key_ptr.* = try self.allocator.dupe(u8, basename); + file_table_gop.value_ptr.* = value; return null; } } @@ -383,19 +385,19 @@ pub fn Watch(comptime V: type) type { defer held.release(); const gop = try self.os_data.dir_table.getOrPut(self.allocator, dirname); - errdefer self.os_data.dir_table.removeAssertDiscard(dirname); + errdefer assert(self.os_data.dir_table.remove(dirname)); if (gop.found_existing) { - const dir = gop.entry.value; + const dir = gop.value_ptr.*; const file_gop = try dir.file_table.getOrPut(self.allocator, basename); - errdefer dir.file_table.removeAssertDiscard(basename); + errdefer assert(dir.file_table.remove(basename)); if (file_gop.found_existing) { - const prev_value = file_gop.entry.value; - file_gop.entry.value = value; + const prev_value = file_gop.value_ptr.*; + file_gop.value_ptr.* = value; return prev_value; } else { - file_gop.entry.value = value; - file_gop.entry.key = try self.allocator.dupe(u8, basename); + file_gop.value_ptr.* = value; + file_gop.key_ptr.* = try self.allocator.dupe(u8, basename); return null; } } else { @@ -411,17 +413,17 @@ pub fn Watch(comptime V: type) type { const dir = try self.allocator.create(OsData.Dir); errdefer self.allocator.destroy(dir); - gop.entry.key = try self.allocator.dupe(u8, dirname); - errdefer self.allocator.free(gop.entry.key); + gop.key_ptr.* = try self.allocator.dupe(u8, dirname); + errdefer self.allocator.free(gop.key_ptr.*); dir.* = OsData.Dir{ .file_table = OsData.FileTable.init(self.allocator), .putter_frame = undefined, .dir_handle = dir_handle, }; - gop.entry.value = dir; + gop.value_ptr.* = dir; try dir.file_table.put(self.allocator, try self.allocator.dupe(u8, basename), value); - dir.putter_frame = async self.windowsDirReader(dir, gop.entry.key); + dir.putter_frame = async self.windowsDirReader(dir, gop.key_ptr.*); return null; } } @@ -501,9 +503,9 @@ pub fn Watch(comptime V: type) type { if (dir.file_table.getEntry(basename)) |entry| { self.channel.put(Event{ .id = id, - .data = entry.value, + .data = entry.value_ptr.*, .dirname = dirname, - .basename = entry.key, + .basename = entry.key_ptr.*, }); } } @@ -525,7 +527,7 @@ pub fn Watch(comptime V: type) type { defer held.release(); const dir = self.os_data.wd_table.get(dirname) orelse return null; - if (dir.file_table.remove(basename)) |file_entry| { + if (dir.file_table.fetchRemove(basename)) |file_entry| { self.allocator.free(file_entry.key); return file_entry.value; } @@ -539,7 +541,7 @@ pub fn Watch(comptime V: type) type { defer held.release(); const dir = self.os_data.dir_table.get(dirname) orelse return null; - if (dir.file_table.remove(basename)) |file_entry| { + if (dir.file_table.fetchRemove(basename)) |file_entry| { self.allocator.free(file_entry.key); return file_entry.value; } @@ -552,14 +554,14 @@ pub fn Watch(comptime V: type) type { const held = self.os_data.table_lock.acquire(); defer held.release(); - const entry = self.os_data.file_table.get(realpath) orelse return null; - entry.value.cancelled = true; + const entry = self.os_data.file_table.getEntry(realpath) orelse return null; + entry.value_ptr.cancelled = true; // @TODO Close the fd here? - await entry.value.putter_frame; - self.allocator.free(entry.key); - self.allocator.destroy(entry.value); + await entry.value_ptr.putter_frame; + self.allocator.free(entry.key_ptr.*); + self.allocator.destroy(entry.value_ptr.*); - self.os_data.file_table.removeAssertDiscard(realpath); + assert(self.os_data.file_table.remove(realpath)); }, else => @compileError("Unsupported OS"), } @@ -594,19 +596,19 @@ pub fn Watch(comptime V: type) type { if (dir.file_table.getEntry(basename)) |file_value| { self.channel.put(Event{ .id = .CloseWrite, - .data = file_value.value, + .data = file_value.value_ptr.*, .dirname = dir.dirname, - .basename = file_value.key, + .basename = file_value.key_ptr.*, }); } } else if (ev.mask & os.linux.IN_IGNORED == os.linux.IN_IGNORED) { // Directory watch was removed const held = self.os_data.table_lock.acquire(); defer held.release(); - if (self.os_data.wd_table.remove(ev.wd)) |*wd_entry| { - var file_it = wd_entry.value.file_table.iterator(); + if (self.os_data.wd_table.fetchRemove(ev.wd)) |wd_entry| { + var file_it = wd_entry.value.file_table.keyIterator(); while (file_it.next()) |file_entry| { - self.allocator.free(file_entry.key); + self.allocator.free(file_entry.*); } self.allocator.free(wd_entry.value.dirname); wd_entry.value.file_table.deinit(self.allocator); @@ -620,9 +622,9 @@ pub fn Watch(comptime V: type) type { if (dir.file_table.getEntry(basename)) |file_value| { self.channel.put(Event{ .id = .Delete, - .data = file_value.value, + .data = file_value.value_ptr.*, .dirname = dir.dirname, - .basename = file_value.key, + .basename = file_value.key_ptr.*, }); } } diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 62f577de1a..3a3c30bdf8 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -15,7 +15,7 @@ const trait = meta.trait; const Allocator = mem.Allocator; const Wyhash = std.hash.Wyhash; -pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { +pub fn getAutoHashFn(comptime K: type, comptime Context: type) (fn (Context, K) u64) { comptime { assert(@hasDecl(std, "StringHashMap")); // detect when the following message needs updated if (K == []const u8) { @@ -28,7 +28,7 @@ pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { } return struct { - fn hash(key: K) u64 { + fn hash(ctx: Context, key: K) u64 { if (comptime trait.hasUniqueRepresentation(K)) { return Wyhash.hash(0, std.mem.asBytes(&key)); } else { @@ -40,31 +40,51 @@ pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { }.hash; } -pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { +pub fn getAutoEqlFn(comptime K: type, comptime Context: type) (fn (Context, K, K) bool) { return struct { - fn eql(a: K, b: K) bool { + fn eql(ctx: Context, a: K, b: K) bool { return meta.eql(a, b); } }.eql; } pub fn AutoHashMap(comptime K: type, comptime V: type) type { - return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), default_max_load_percentage); + return HashMap(K, V, AutoContext(K), default_max_load_percentage); } pub fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type { - return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), default_max_load_percentage); + return HashMapUnmanaged(K, V, AutoContext(K), default_max_load_percentage); +} + +pub fn AutoContext(comptime K: type) type { + return struct { + pub const hash = getAutoHashFn(K, @This()); + pub const eql = getAutoEqlFn(K, @This()); + }; } /// Builtin hashmap for strings as keys. +/// Key memory is managed by the caller. Keys and values +/// will not automatically be freed. pub fn StringHashMap(comptime V: type) type { - return HashMap([]const u8, V, hashString, eqlString, default_max_load_percentage); + return HashMap([]const u8, V, StringContext, default_max_load_percentage); } +/// Key memory is managed by the caller. Keys and values +/// will not automatically be freed. pub fn StringHashMapUnmanaged(comptime V: type) type { - return HashMapUnmanaged([]const u8, V, hashString, eqlString, default_max_load_percentage); + return HashMapUnmanaged([]const u8, V, StringContext, default_max_load_percentage); } +pub const StringContext = struct { + pub fn hash(self: @This(), s: []const u8) u64 { + return hashString(s); + } + pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { + return eqlString(a, b); + } +}; + pub fn eqlString(a: []const u8, b: []const u8) bool { return mem.eql(u8, a, b); } @@ -78,6 +98,222 @@ pub const DefaultMaxLoadPercentage = default_max_load_percentage; pub const default_max_load_percentage = 80; +/// This function issues a compile error with a helpful message if there +/// is a problem with the provided context type. A context must have the following +/// member functions: +/// - hash(self, PseudoKey) Hash +/// - eql(self, PseudoKey, Key) bool +/// If you are passing a context to a *Adapted function, PseudoKey is the type +/// of the key parameter. Otherwise, when creating a HashMap or HashMapUnmanaged +/// type, PseudoKey = Key = K. +pub fn verifyContext(comptime RawContext: type, comptime PseudoKey: type, comptime Key: type, comptime Hash: type) void { + comptime { + var allow_const_ptr = false; + var allow_mutable_ptr = false; + // Context is the actual namespace type. RawContext may be a pointer to Context. + var Context = RawContext; + // Make sure the context is a namespace type which may have member functions + switch (@typeInfo(Context)) { + .Struct, .Union, .Enum => {}, + // Special-case .Opaque for a better error message + .Opaque => @compileError("Hash context must be a type with hash and eql member functions. Cannot use "++@typeName(Context)++" because it is opaque. Use a pointer instead."), + .Pointer => |ptr| { + if (ptr.size != .One) { + @compileError("Hash context must be a type with hash and eql member functions. Cannot use "++@typeName(Context)++" because it is not a single pointer."); + } + Context = ptr.child; + allow_const_ptr = true; + allow_mutable_ptr = !ptr.is_const; + switch (@typeInfo(Context)) { + .Struct, .Union, .Enum, .Opaque => {}, + else => @compileError("Hash context must be a type with hash and eql member functions. Cannot use "++@typeName(Context)), + } + }, + else => @compileError("Hash context must be a type with hash and eql member functions. Cannot use "++@typeName(Context)), + } + + // Keep track of multiple errors so we can report them all. + var errors: []const u8 = ""; + + // Put common errors here, they will only be evaluated + // if the error is actually triggered. + const lazy = struct { + const prefix = "\n "; + const deep_prefix = prefix ++ " "; + const hash_signature = "fn (self, "++@typeName(PseudoKey)++") "++@typeName(Hash); + const eql_signature = "fn (self, "++@typeName(PseudoKey)++", "++@typeName(Key)++") bool"; + const err_invalid_hash_signature = prefix ++ @typeName(Context) ++ ".hash must be " ++ hash_signature ++ + deep_prefix ++ "but is actually " ++ @typeName(@TypeOf(Context.hash)); + const err_invalid_eql_signature = prefix ++ @typeName(Context) ++ ".eql must be " ++ eql_signature ++ + deep_prefix ++ "but is actually " ++ @typeName(@TypeOf(Context.eql)); + }; + + // Verify Context.hash(self, PseudoKey) => Hash + if (@hasDecl(Context, "hash")) { + const hash = Context.hash; + const info = @typeInfo(@TypeOf(hash)); + if (info == .Fn) { + const func = info.Fn; + if (func.args.len != 2) { + errors = errors ++ lazy.err_invalid_hash_signature; + } else { + var emitted_signature = false; + if (func.args[0].arg_type) |Self| { + if (Self == Context) { + // pass, this is always fine. + } else if (Self == *const Context) { + if (!allow_const_ptr) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_hash_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be a pointer because it is passed by value."; + } + } else if (Self == *Context) { + if (!allow_mutable_ptr) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_hash_signature; + emitted_signature = true; + } + if (!allow_const_ptr) { + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be a pointer because it is passed by value."; + } else { + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++" or "++@typeName(*const Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be non-const because it is passed by const pointer."; + } + } + } else { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_hash_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context); + if (allow_const_ptr) { + errors = errors++" or "++@typeName(*const Context); + if (allow_mutable_ptr) { + errors = errors++" or "++@typeName(*Context); + } + } + errors = errors++", but is "++@typeName(Self); + } + } + if (func.args[1].arg_type != null and func.args[1].arg_type.? != PseudoKey) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_hash_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "Second parameter must be "++@typeName(PseudoKey)++", but is "++@typeName(func.args[1].arg_type.?); + } + if (func.return_type != null and func.return_type.? != Hash) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_hash_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "Return type must be "++@typeName(Hash)++", but was "++@typeName(func.return_type.?); + } + // If any of these are generic (null), we cannot verify them. + // The call sites check the return type, but cannot check the + // parameters. This may cause compile errors with generic hash/eql functions. + } + } else { + errors = errors ++ lazy.err_invalid_hash_signature; + } + } else { + errors = errors ++ lazy.prefix ++ @typeName(Context) ++ " must declare a hash function with signature " ++ lazy.hash_signature; + } + + // Verify Context.eql(self, PseudoKey, Key) => bool + if (@hasDecl(Context, "eql")) { + const eql = Context.eql; + const info = @typeInfo(@TypeOf(eql)); + if (info == .Fn) { + const func = info.Fn; + if (func.args.len != 3) { + errors = errors ++ lazy.err_invalid_eql_signature; + } else { + var emitted_signature = false; + if (func.args[0].arg_type) |Self| { + if (Self == Context) { + // pass, this is always fine. + } else if (Self == *const Context) { + if (!allow_const_ptr) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be a pointer because it is passed by value."; + } + } else if (Self == *Context) { + if (!allow_mutable_ptr) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + if (!allow_const_ptr) { + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be a pointer because it is passed by value."; + } else { + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context)++" or "++@typeName(*const Context)++", but is "++@typeName(Self); + errors = errors ++ lazy.deep_prefix ++ "Note: Cannot be non-const because it is passed by const pointer."; + } + } + } else { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "First parameter must be "++@typeName(Context); + if (allow_const_ptr) { + errors = errors++" or "++@typeName(*const Context); + if (allow_mutable_ptr) { + errors = errors++" or "++@typeName(*Context); + } + } + errors = errors++", but is "++@typeName(Self); + } + } + if (func.args[1].arg_type.? != PseudoKey) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "Second parameter must be "++@typeName(PseudoKey)++", but is "++@typeName(func.args[1].arg_type.?); + } + if (func.args[2].arg_type.? != Key) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "Third parameter must be "++@typeName(Key)++", but is "++@typeName(func.args[2].arg_type.?); + } + if (func.return_type.? != bool) { + if (!emitted_signature) { + errors = errors ++ lazy.err_invalid_eql_signature; + emitted_signature = true; + } + errors = errors ++ lazy.deep_prefix ++ "Return type must be bool, but was "++@typeName(func.return_type.?); + } + // If any of these are generic (null), we cannot verify them. + // The call sites check the return type, but cannot check the + // parameters. This may cause compile errors with generic hash/eql functions. + } + } else { + errors = errors ++ lazy.err_invalid_eql_signature; + } + } else { + errors = errors ++ lazy.prefix ++ @typeName(Context) ++ " must declare a eql function with signature " ++ lazy.eql_signature; + } + + if (errors.len != 0) { + // errors begins with a newline (from lazy.prefix) + @compileError("Problems found with hash context type "++@typeName(Context)++":"++errors); + } + } +} + /// General purpose hash table. /// No order is guaranteed and any modification invalidates live iterators. /// It provides fast operations (lookup, insertion, deletion) with quite high @@ -86,83 +322,167 @@ pub const default_max_load_percentage = 80; /// field, see `HashMapUnmanaged`. /// If iterating over the table entries is a strong usecase and needs to be fast, /// prefer the alternative `std.ArrayHashMap`. +/// Context must be a struct type with two member functions: +/// hash(self, K) u64 +/// eql(self, K, K) bool +/// Adapted variants of many functions are provided. These variants +/// take a pseudo key instead of a key. Their context must have the functions: +/// hash(self, PseudoKey) u64 +/// eql(self, PseudoKey, K) bool pub fn HashMap( comptime K: type, comptime V: type, - comptime hashFn: fn (key: K) u64, - comptime eqlFn: fn (a: K, b: K) bool, + comptime Context: type, comptime max_load_percentage: u64, ) type { + comptime verifyContext(Context, K, K, u64); return struct { unmanaged: Unmanaged, allocator: *Allocator, + ctx: Context, - pub const Unmanaged = HashMapUnmanaged(K, V, hashFn, eqlFn, max_load_percentage); + /// The type of the unmanaged hash map underlying this wrapper + pub const Unmanaged = HashMapUnmanaged(K, V, Context, max_load_percentage); + /// An entry, containing pointers to a key and value stored in the map pub const Entry = Unmanaged.Entry; + /// A copy of a key and value which are no longer in the map + pub const KV = Unmanaged.KV; + /// The integer type that is the result of hashing pub const Hash = Unmanaged.Hash; + /// The iterator type returned by iterator() pub const Iterator = Unmanaged.Iterator; + + pub const KeyIterator = Unmanaged.KeyIterator; + pub const ValueIterator = Unmanaged.ValueIterator; + + /// The integer type used to store the size of the map pub const Size = Unmanaged.Size; + /// The type returned from getOrPut and variants pub const GetOrPutResult = Unmanaged.GetOrPutResult; const Self = @This(); + /// Create a managed hash map with an empty context. + /// If the context is not zero-sized, you must use + /// initContext(allocator, ctx) instead. pub fn init(allocator: *Allocator) Self { + if (@sizeOf(Context) != 0) { + @compileError("Context must be specified! Call initContext(allocator, ctx) instead."); + } + return .{ + .unmanaged = .{}, + .allocator = allocator, + .ctx = undefined, // ctx is zero-sized so this is safe. + }; + } + + /// Create a managed hash map with a context + pub fn initContext(allocator: *Allocator, ctx: Context) Self { return .{ .unmanaged = .{}, .allocator = allocator, + .ctx = ctx, }; } + /// Release the backing array and invalidate this map. + /// This does *not* deinit keys, values, or the context! + /// If your keys or values need to be released, ensure + /// that that is done before calling this function. pub fn deinit(self: *Self) void { self.unmanaged.deinit(self.allocator); self.* = undefined; } + /// Empty the map, but keep the backing allocation for future use. + /// This does *not* free keys or values! Be sure to + /// release them if they need deinitialization before + /// calling this function. pub fn clearRetainingCapacity(self: *Self) void { return self.unmanaged.clearRetainingCapacity(); } + /// Empty the map and release the backing allocation. + /// This does *not* free keys or values! Be sure to + /// release them if they need deinitialization before + /// calling this function. pub fn clearAndFree(self: *Self) void { return self.unmanaged.clearAndFree(self.allocator); } + /// Return the number of items in the map. pub fn count(self: Self) Size { return self.unmanaged.count(); } + /// Create an iterator over the entries in the map. + /// The iterator is invalidated if the map is modified. pub fn iterator(self: *const Self) Iterator { return self.unmanaged.iterator(); } + /// Create an iterator over the keys in the map. + /// The iterator is invalidated if the map is modified. + pub fn keyIterator(self: *const Self) KeyIterator { + return self.unmanaged.keyIterator(); + } + + /// Create an iterator over the values in the map. + /// The iterator is invalidated if the map is modified. + pub fn valueIterator(self: *const Self) ValueIterator { + return self.unmanaged.valueIterator(); + } + /// If key exists this function cannot fail. /// If there is an existing item with `key`, then the result - /// `Entry` pointer points to it, and found_existing is true. + /// `Entry` pointers point to it, and found_existing is true. /// Otherwise, puts a new item with undefined value, and - /// the `Entry` pointer points to it. Caller should then initialize + /// the `Entry` pointers point to it. Caller should then initialize /// the value (but not the key). pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { - return self.unmanaged.getOrPut(self.allocator, key); + return self.unmanaged.getOrPutContext(self.allocator, key, self.ctx); + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointers point to it, and found_existing is true. + /// Otherwise, puts a new item with undefined key and value, and + /// the `Entry` pointers point to it. Caller must then initialize + /// the key and value. + pub fn getOrPutAdapted(self: *Self, key: anytype, ctx: anytype) !GetOrPutResult { + return self.unmanaged.getOrPutContextAdapted(self.allocator, key, ctx, self.ctx); } /// If there is an existing item with `key`, then the result - /// `Entry` pointer points to it, and found_existing is true. + /// `Entry` pointers point to it, and found_existing is true. /// Otherwise, puts a new item with undefined value, and - /// the `Entry` pointer points to it. Caller should then initialize + /// the `Entry` pointers point to it. Caller should then initialize /// the value (but not the key). /// If a new entry needs to be stored, this function asserts there /// is enough capacity to store it. pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { - return self.unmanaged.getOrPutAssumeCapacity(key); + return self.unmanaged.getOrPutAssumeCapacityContext(key, self.ctx); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointers point to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointers point to it. Caller must then initialize + /// the key and value. + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult { + return self.unmanaged.getOrPutAssumeCapacityAdapted(self.allocator, key, ctx); } - pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { - return self.unmanaged.getOrPutValue(self.allocator, key, value); + pub fn getOrPutValue(self: *Self, key: K, value: V) !Entry { + return self.unmanaged.getOrPutValueContext(self.allocator, key, value, self.ctx); } /// Increases capacity, guaranteeing that insertions up until the /// `expected_count` will not cause an allocation, and therefore cannot fail. pub fn ensureCapacity(self: *Self, expected_count: Size) !void { - return self.unmanaged.ensureCapacity(self.allocator, expected_count); + return self.unmanaged.ensureCapacityContext(self.allocator, expected_count, self.ctx); } /// Returns the number of total elements which may be present before it is @@ -174,67 +494,114 @@ pub fn HashMap( /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPut`. pub fn put(self: *Self, key: K, value: V) !void { - return self.unmanaged.put(self.allocator, key, value); + return self.unmanaged.putContext(self.allocator, key, value, self.ctx); } /// Inserts a key-value pair into the hash map, asserting that no previous /// entry with the same key is already present pub fn putNoClobber(self: *Self, key: K, value: V) !void { - return self.unmanaged.putNoClobber(self.allocator, key, value); + return self.unmanaged.putNoClobberContext(self.allocator, key, value, self.ctx); } /// Asserts there is enough capacity to store the new key-value pair. /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { - return self.unmanaged.putAssumeCapacity(key, value); + return self.unmanaged.putAssumeCapacityContext(key, value, self.ctx); } /// Asserts there is enough capacity to store the new key-value pair. /// Asserts that it does not clobber any existing data. /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - return self.unmanaged.putAssumeCapacityNoClobber(key, value); + return self.unmanaged.putAssumeCapacityNoClobberContext(key, value, self.ctx); } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. - pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { - return self.unmanaged.fetchPut(self.allocator, key, value); + pub fn fetchPut(self: *Self, key: K, value: V) !?KV { + return self.unmanaged.fetchPutContext(self.allocator, key, value, self.ctx); } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. /// If insertion happuns, asserts there is enough capacity without allocating. - pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { - return self.unmanaged.fetchPutAssumeCapacity(key, value); + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?KV { + return self.unmanaged.fetchPutAssumeCapacityContext(key, value, self.ctx); } + /// Removes a value from the map and returns the removed kv pair. + pub fn fetchRemove(self: *Self, key: K) ?KV { + return self.unmanaged.fetchRemoveContext(key, self.ctx); + } + + pub fn fetchRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + return self.unmanaged.fetchRemoveAdapted(key, ctx); + } + + /// Finds the value associated with a key in the map pub fn get(self: Self, key: K) ?V { - return self.unmanaged.get(key); + return self.unmanaged.getContext(key, self.ctx); + } + pub fn getAdapted(self: Self, key: anytype, ctx: anytype) ?V { + return self.unmanaged.getAdapted(key, ctx); + } + + pub fn getPtr(self: Self, key: K) ?*V { + return self.unmanaged.getPtrContext(key, self.ctx); + } + pub fn getPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*V { + return self.unmanaged.getPtrAdapted(key, self.ctx); + } + + /// Finds the key and value associated with a key in the map + pub fn getEntry(self: Self, key: K) ?Entry { + return self.unmanaged.getEntryContext(key, self.ctx); } - pub fn getEntry(self: Self, key: K) ?*Entry { - return self.unmanaged.getEntry(key); + pub fn getEntryAdapted(self: Self, key: anytype, ctx: anytype) ?Entry { + return self.unmanaged.getEntryAdapted(key, ctx); } + /// Check if the map contains a key pub fn contains(self: Self, key: K) bool { - return self.unmanaged.contains(key); + return self.unmanaged.containsContext(key, self.ctx); + } + + pub fn containsAdapted(self: Self, key: anytype, ctx: anytype) bool { + return self.unmanaged.containsAdapted(key, ctx); } /// If there is an `Entry` with a matching key, it is deleted from /// the hash map, and then returned from this function. - pub fn remove(self: *Self, key: K) ?Entry { - return self.unmanaged.remove(key); + pub fn remove(self: *Self, key: K) bool { + return self.unmanaged.removeContext(key, self.ctx); } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map, - /// and discards it. - pub fn removeAssertDiscard(self: *Self, key: K) void { - return self.unmanaged.removeAssertDiscard(key); + pub fn removeAdapted(self: *Self, key: anytype, ctx: anytype) bool { + return self.unmanaged.removeAdapted(key, ctx); } + /// Creates a copy of this map, using the same allocator pub fn clone(self: Self) !Self { - var other = try self.unmanaged.clone(self.allocator); - return other.promote(self.allocator); + var other = try self.unmanaged.cloneContext(self.allocator, self.ctx); + return other.promoteContext(self.allocator, self.ctx); + } + + /// Creates a copy of this map, using a specified allocator + pub fn cloneWithAllocator(self: Self, new_allocator: *Allocator) !Self { + var other = try self.unmanaged.cloneContext(new_allocator, self.ctx); + return other.promoteContext(new_allocator, self.ctx); + } + + /// Creates a copy of this map, using a specified context + pub fn cloneWithContext(self: Self, new_ctx: anytype) !HashMap(K, V, @TypeOf(new_ctx), max_load_percentage) { + var other = try self.unmanaged.cloneContext(self.allocator, new_ctx); + return other.promoteContext(self.allocator, new_ctx); + } + + /// Creates a copy of this map, using a specified allocator and context + pub fn cloneWithAllocatorAndContext(new_allocator: *Allocator, new_ctx: anytype) !HashMap(K, V, @TypeOf(new_ctx), max_load_percentage) { + var other = try self.unmanaged.cloneContext(new_allocator, new_ctx); + return other.promoteContext(new_allocator, new_ctx); } }; } @@ -251,11 +618,12 @@ pub fn HashMap( pub fn HashMapUnmanaged( comptime K: type, comptime V: type, - hashFn: fn (key: K) u64, - eqlFn: fn (a: K, b: K) bool, + comptime Context: type, comptime max_load_percentage: u64, ) type { - comptime assert(max_load_percentage > 0 and max_load_percentage < 100); + if (max_load_percentage <= 0 or max_load_percentage >= 100) + @compileError("max_load_percentage must be between 0 and 100."); + comptime verifyContext(Context, K, K, u64); return struct { const Self = @This(); @@ -284,19 +652,25 @@ pub fn HashMapUnmanaged( const minimal_capacity = 8; // This hashmap is specially designed for sizes that fit in a u32. - const Size = u32; + pub const Size = u32; // u64 hashes guarantee us that the fingerprint bits will never be used // to compute the index of a slot, maximizing the use of entropy. - const Hash = u64; + pub const Hash = u64; pub const Entry = struct { + key_ptr: *K, + value_ptr: *V, + }; + + pub const KV = struct { key: K, value: V, }; const Header = packed struct { - entries: [*]Entry, + values: [*]V, + keys: [*]K, capacity: Size, }; @@ -353,11 +727,11 @@ pub fn HashMapUnmanaged( assert(@alignOf(Metadata) == 1); } - const Iterator = struct { + pub const Iterator = struct { hm: *const Self, index: Size = 0, - pub fn next(it: *Iterator) ?*Entry { + pub fn next(it: *Iterator) ?Entry { assert(it.index <= it.hm.capacity()); if (it.hm.size == 0) return null; @@ -370,9 +744,10 @@ pub fn HashMapUnmanaged( it.index += 1; }) { if (metadata[0].isUsed()) { - const entry = &it.hm.entries()[it.index]; + const key = &it.hm.keys()[it.index]; + const value = &it.hm.values()[it.index]; it.index += 1; - return entry; + return Entry{ .key_ptr = key, .value_ptr = value }; } } @@ -380,17 +755,50 @@ pub fn HashMapUnmanaged( } }; + pub const KeyIterator = FieldIterator(K); + pub const ValueIterator = FieldIterator(V); + + fn FieldIterator(comptime T: type) type { + return struct { + len: usize, + metadata: [*]const Metadata, + items: [*]T, + + pub fn next(self: *@This()) ?*T { + while (self.len > 0) { + self.len -= 1; + const used = self.metadata[0].isUsed(); + const item = &self.items[0]; + self.metadata += 1; + self.items += 1; + if (used) { + return item; + } + } + return null; + } + }; + } + pub const GetOrPutResult = struct { - entry: *Entry, + key_ptr: *K, + value_ptr: *V, found_existing: bool, }; - pub const Managed = HashMap(K, V, hashFn, eqlFn, max_load_percentage); + pub const Managed = HashMap(K, V, Context, max_load_percentage); pub fn promote(self: Self, allocator: *Allocator) Managed { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call promoteContext instead."); + return promoteContext(self, allocator, undefined); + } + + pub fn promoteContext(self: Self, allocator: *Allocator, ctx: Context) Managed { return .{ .unmanaged = self, .allocator = allocator, + .ctx = ctx, }; } @@ -403,26 +811,6 @@ pub fn HashMapUnmanaged( self.* = undefined; } - fn deallocate(self: *Self, allocator: *Allocator) void { - if (self.metadata == null) return; - - const cap = self.capacity(); - const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata); - - const alignment = @alignOf(Entry) - 1; - const entries_size = @as(usize, cap) * @sizeOf(Entry) + alignment; - - const total_size = meta_size + entries_size; - - var slice: []u8 = undefined; - slice.ptr = @intToPtr([*]u8, @ptrToInt(self.header())); - slice.len = total_size; - allocator.free(slice); - - self.metadata = null; - self.available = 0; - } - fn capacityForSize(size: Size) Size { var new_cap = @truncate(u32, (@as(u64, size) * 100) / max_load_percentage + 1); new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable; @@ -430,8 +818,13 @@ pub fn HashMapUnmanaged( } pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_size: Size) !void { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call ensureCapacityContext instead."); + return ensureCapacityContext(self, allocator, new_size, undefined); + } + pub fn ensureCapacityContext(self: *Self, allocator: *Allocator, new_size: Size, ctx: Context) !void { if (new_size > self.size) - try self.growIfNeeded(allocator, new_size - self.size); + try self.growIfNeeded(allocator, new_size - self.size, ctx); } pub fn clearRetainingCapacity(self: *Self) void { @@ -456,8 +849,12 @@ pub fn HashMapUnmanaged( return @ptrCast(*Header, @ptrCast([*]Header, self.metadata.?) - 1); } - fn entries(self: *const Self) [*]Entry { - return self.header().entries; + fn keys(self: *const Self) [*]K { + return self.header().keys; + } + + fn values(self: *const Self) [*]V { + return self.header().values; } pub fn capacity(self: *const Self) Size { @@ -470,28 +867,75 @@ pub fn HashMapUnmanaged( return .{ .hm = self }; } + pub fn keyIterator(self: *const Self) KeyIterator { + if (self.metadata) |metadata| { + return .{ + .len = self.capacity(), + .metadata = metadata, + .items = self.keys(), + }; + } else { + return .{ + .len = 0, + .metadata = undefined, + .items = undefined, + }; + } + } + + pub fn valueIterator(self: *const Self) ValueIterator { + if (self.metadata) |metadata| { + return .{ + .len = self.capacity(), + .metadata = metadata, + .items = self.values(), + }; + } else { + return .{ + .len = 0, + .metadata = undefined, + .items = undefined, + }; + } + } + /// Insert an entry in the map. Assumes it is not already present. pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { - assert(!self.contains(key)); - try self.growIfNeeded(allocator, 1); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putNoClobberContext instead."); + return self.putNoClobberContext(allocator, key, value, undefined); + } + pub fn putNoClobberContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !void { + assert(!self.containsContext(key, ctx)); + try self.growIfNeeded(allocator, 1, ctx); - self.putAssumeCapacityNoClobber(key, value); + self.putAssumeCapacityNoClobberContext(key, value, ctx); } /// Asserts there is enough capacity to store the new key-value pair. /// Clobbers any existing data. To detect if a put would clobber /// existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { - const gop = self.getOrPutAssumeCapacity(key); - gop.entry.value = value; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putAssumeCapacityContext instead."); + return self.putAssumeCapacityContext(key, value, undefined); + } + pub fn putAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) void { + const gop = self.getOrPutAssumeCapacityContext(key, ctx); + gop.value_ptr.* = value; } /// Insert an entry in the map. Assumes it is not already present, /// and that no allocation is needed. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - assert(!self.contains(key)); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putAssumeCapacityNoClobberContext instead."); + return self.putAssumeCapacityNoClobberContext(key, value, undefined); + } + pub fn putAssumeCapacityNoClobberContext(self: *Self, key: K, value: V, ctx: Context) void { + assert(!self.containsContext(key, ctx)); - const hash = hashFn(key); + const hash = ctx.hash(key); const mask = self.capacity() - 1; var idx = @truncate(usize, hash & mask); @@ -508,40 +952,102 @@ pub fn HashMapUnmanaged( const fingerprint = Metadata.takeFingerprint(hash); metadata[0].fill(fingerprint); - self.entries()[idx] = Entry{ .key = key, .value = value }; + self.keys()[idx] = key; + self.values()[idx] = value; self.size += 1; } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. - pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { - const gop = try self.getOrPut(allocator, key); - var result: ?Entry = null; + pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchPutContext instead."); + return self.fetchPutContext(allocator, key, value, undefined); + } + pub fn fetchPutContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !?KV { + const gop = try self.getOrPutContext(allocator, key, ctx); + var result: ?KV = null; if (gop.found_existing) { - result = gop.entry.*; + result = KV{ + .key = gop.key_ptr.*, + .value = gop.value_ptr.*, + }; } - gop.entry.value = value; + gop.value_ptr.* = value; return result; } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. /// If insertion happens, asserts there is enough capacity without allocating. - pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { - const gop = self.getOrPutAssumeCapacity(key); - var result: ?Entry = null; + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchPutAssumeCapacityContext instead."); + return self.fetchPutAssumeCapacityContext(key, value, undefined); + } + pub fn fetchPutAssumeCapacityContext(self: *Self, key: K, value: V, ctx: Context) ?KV { + const gop = self.getOrPutAssumeCapacityContext(key, ctx); + var result: ?KV = null; if (gop.found_existing) { - result = gop.entry.*; + result = KV{ + .key = gop.key_ptr.*, + .value = gop.value_ptr.*, + }; } - gop.entry.value = value; + gop.value_ptr.* = value; return result; } - pub fn getEntry(self: Self, key: K) ?*Entry { + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn fetchRemove(self: *Self, key: K) ?KV { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call fetchRemoveContext instead."); + return self.fetchRemoveContext(key, undefined); + } + pub fn fetchRemoveContext(self: *Self, key: K, ctx: Context) ?KV { + return self.fetchRemoveAdapted(key, ctx); + } + pub fn fetchRemoveAdapted(self: *Self, key: anytype, ctx: anytype) ?KV { + if (self.getIndex(key, ctx)) |idx| { + const old_key = &self.keys()[idx]; + const old_val = &self.values()[idx]; + const result = KV{ + .key = old_key.*, + .value = old_val.*, + }; + self.metadata.?[idx].remove(); + old_key.* = undefined; + old_val.* = undefined; + self.size -= 1; + return result; + } + + return null; + } + + /// Find the index containing the data for the given key. + /// Whether this function returns null is almost always + /// branched on after this function returns, and this function + /// returns null/not null from separate code paths. We + /// want the optimizer to remove that branch and instead directly + /// fuse the basic blocks after the branch to the basic blocks + /// from this function. To encourage that, this function is + /// marked as inline. + fn getIndex(self: Self, key: anytype, ctx: anytype) callconv(.Inline) ?usize { + comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash); + if (self.size == 0) { return null; } - const hash = hashFn(key); + // If you get a compile error on this line, it means that your generic hash + // function is invalid for these parameters. + const hash = ctx.hash(key); + // verifyContext can't verify the return type of generic hash functions, + // so we need to double-check it here. + if (@TypeOf(hash) != Hash) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic hash function that returns the wrong type! "++@typeName(Hash)++" was expected, but found "++@typeName(@TypeOf(hash))); + } const mask = self.capacity() - 1; const fingerprint = Metadata.takeFingerprint(hash); var idx = @truncate(usize, hash & mask); @@ -549,11 +1055,20 @@ pub fn HashMapUnmanaged( var metadata = self.metadata.? + idx; while (metadata[0].isUsed() or metadata[0].isTombstone()) { if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { - const entry = &self.entries()[idx]; - if (eqlFn(entry.key, key)) { - return entry; + const test_key = &self.keys()[idx]; + // If you get a compile error on this line, it means that your generic eql + // function is invalid for these parameters. + const eql = ctx.eql(key, test_key.*); + // verifyContext can't verify the return type of generic eql functions, + // so we need to double-check it here. + if (@TypeOf(eql) != bool) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic eql function that returns the wrong type! bool was expected, but found "++@typeName(@TypeOf(eql))); + } + if (eql) { + return idx; } } + idx = (idx + 1) & mask; metadata = self.metadata.? + idx; } @@ -561,46 +1076,122 @@ pub fn HashMapUnmanaged( return null; } + pub fn getEntry(self: Self, key: K) ?Entry { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getEntryContext instead."); + return self.getEntryContext(key, undefined); + } + pub fn getEntryContext(self: Self, key: K, ctx: Context) ?Entry { + return self.getEntryAdapted(key, ctx); + } + pub fn getEntryAdapted(self: Self, key: anytype, ctx: anytype) ?Entry { + if (self.getIndex(key, ctx)) |idx| { + return Entry{ + .key_ptr = &self.keys()[idx], + .value_ptr = &self.values()[idx], + }; + } + return null; + } + /// Insert an entry if the associated key is not already present, otherwise update preexisting value. pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { - const result = try self.getOrPut(allocator, key); - result.entry.value = value; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call putContext instead."); + return self.putContext(allocator, key, value, undefined); + } + pub fn putContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !void { + const result = try self.getOrPutContext(allocator, key, ctx); + result.value_ptr.* = value; } /// Get an optional pointer to the value associated with key, if present. - pub fn get(self: Self, key: K) ?V { - if (self.size == 0) { - return null; + pub fn getPtr(self: Self, key: K) ?*V { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getPtrContext instead."); + return self.getPtrContext(key, undefined); + } + pub fn getPtrContext(self: Self, key: K, ctx: Context) ?*V { + return self.getPtrAdapted(key, ctx); + } + pub fn getPtrAdapted(self: Self, key: anytype, ctx: anytype) ?*V { + if (self.getIndex(key, ctx)) |idx| { + return &self.values()[idx]; } + return null; + } - const hash = hashFn(key); - const mask = self.capacity() - 1; - const fingerprint = Metadata.takeFingerprint(hash); - var idx = @truncate(usize, hash & mask); - - var metadata = self.metadata.? + idx; - while (metadata[0].isUsed() or metadata[0].isTombstone()) { - if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { - const entry = &self.entries()[idx]; - if (eqlFn(entry.key, key)) { - return entry.value; - } - } - idx = (idx + 1) & mask; - metadata = self.metadata.? + idx; + /// Get a copy of the value associated with key, if present. + pub fn get(self: Self, key: K) ?V { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getContext instead."); + return self.getContext(key, undefined); + } + pub fn getContext(self: Self, key: K, ctx: Context) ?V { + return self.getAdapted(key, ctx); + } + pub fn getAdapted(self: Self, key: anytype, ctx: anytype) ?V { + if (self.getIndex(key, ctx)) |idx| { + return self.values()[idx]; } - return null; } pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { - try self.growIfNeeded(allocator, 1); - - return self.getOrPutAssumeCapacity(key); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutContext instead."); + return self.getOrPutContext(allocator, key, undefined); + } + pub fn getOrPutContext(self: *Self, allocator: *Allocator, key: K, ctx: Context) !GetOrPutResult { + const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx); + if (!gop.found_existing) { + gop.key_ptr.* = key; + } + return gop; + } + pub fn getOrPutAdapted(self: *Self, allocator: *Allocator, key: anytype, key_ctx: anytype) !GetOrPutResult { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutContextAdapted instead."); + return self.getOrPutContextAdapted(allocator, key, key_ctx, undefined); + } + pub fn getOrPutContextAdapted(self: *Self, allocator: *Allocator, key: anytype, key_ctx: anytype, ctx: Context) !GetOrPutResult { + self.growIfNeeded(allocator, 1, ctx) catch |err| { + // If allocation fails, try to do the lookup anyway. + // If we find an existing item, we can return it. + // Otherwise return the error, we could not add another. + const index = self.getIndex(key, key_ctx) orelse return err; + return GetOrPutResult{ + .key_ptr = &self.keys()[index], + .value_ptr = &self.values()[index], + .found_existing = true, + }; + }; + return self.getOrPutAssumeCapacityAdapted(key, key_ctx); } pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { - const hash = hashFn(key); + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutAssumeCapacityContext instead."); + return self.getOrPutAssumeCapacityContext(key, undefined); + } + pub fn getOrPutAssumeCapacityContext(self: *Self, key: K, ctx: Context) GetOrPutResult { + const result = self.getOrPutAssumeCapacityAdapted(key, ctx); + if (!result.found_existing) { + result.key_ptr.* = key; + } + return result; + } + pub fn getOrPutAssumeCapacityAdapted(self: *Self, key: anytype, ctx: anytype) GetOrPutResult { + comptime verifyContext(@TypeOf(ctx), @TypeOf(key), K, Hash); + + // If you get a compile error on this line, it means that your generic hash + // function is invalid for these parameters. + const hash = ctx.hash(key); + // verifyContext can't verify the return type of generic hash functions, + // so we need to double-check it here. + if (@TypeOf(hash) != Hash) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic hash function that returns the wrong type! "++@typeName(Hash)++" was expected, but found "++@typeName(@TypeOf(hash))); + } const mask = self.capacity() - 1; const fingerprint = Metadata.takeFingerprint(hash); var idx = @truncate(usize, hash & mask); @@ -609,9 +1200,21 @@ pub fn HashMapUnmanaged( var metadata = self.metadata.? + idx; while (metadata[0].isUsed() or metadata[0].isTombstone()) { if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { - const entry = &self.entries()[idx]; - if (eqlFn(entry.key, key)) { - return GetOrPutResult{ .entry = entry, .found_existing = true }; + const test_key = &self.keys()[idx]; + // If you get a compile error on this line, it means that your generic eql + // function is invalid for these parameters. + const eql = ctx.eql(key, test_key.*); + // verifyContext can't verify the return type of generic eql functions, + // so we need to double-check it here. + if (@TypeOf(eql) != bool) { + @compileError("Context "++@typeName(@TypeOf(ctx))++" has a generic eql function that returns the wrong type! bool was expected, but found "++@typeName(@TypeOf(eql))); + } + if (eql) { + return GetOrPutResult{ + .key_ptr = test_key, + .value_ptr = &self.values()[idx], + .found_existing = true, + }; } } else if (first_tombstone_idx == self.capacity() and metadata[0].isTombstone()) { first_tombstone_idx = idx; @@ -631,79 +1234,67 @@ pub fn HashMapUnmanaged( } metadata[0].fill(fingerprint); - const entry = &self.entries()[idx]; - entry.* = .{ .key = key, .value = undefined }; + const new_key = &self.keys()[idx]; + const new_value = &self.values()[idx]; + new_key.* = key; + new_value.* = undefined; self.size += 1; - return GetOrPutResult{ .entry = entry, .found_existing = false }; + return GetOrPutResult{ + .key_ptr = new_key, + .value_ptr = new_value, + .found_existing = false, + }; } - pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { - const res = try self.getOrPut(allocator, key); - if (!res.found_existing) res.entry.value = value; - return res.entry; + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !Entry { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call getOrPutValueContext instead."); + return self.getOrPutValueContext(allocator, key, value, undefined); + } + pub fn getOrPutValueContext(self: *Self, allocator: *Allocator, key: K, value: V, ctx: Context) !Entry { + const res = try self.getOrPutAdapted(allocator, key, ctx); + if (!res.found_existing) { + res.key_ptr.* = key; + res.value_ptr.* = value; + } + return Entry{ .key_ptr = res.key_ptr, .value_ptr = res.value_ptr }; } /// Return true if there is a value associated with key in the map. pub fn contains(self: *const Self, key: K) bool { - return self.get(key) != null; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call containsContext instead."); + return self.containsContext(key, undefined); } - - /// If there is an `Entry` with a matching key, it is deleted from - /// the hash map, and then returned from this function. - pub fn remove(self: *Self, key: K) ?Entry { - if (self.size == 0) return null; - - const hash = hashFn(key); - const mask = self.capacity() - 1; - const fingerprint = Metadata.takeFingerprint(hash); - var idx = @truncate(usize, hash & mask); - - var metadata = self.metadata.? + idx; - while (metadata[0].isUsed() or metadata[0].isTombstone()) { - if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { - const entry = &self.entries()[idx]; - if (eqlFn(entry.key, key)) { - const removed_entry = entry.*; - metadata[0].remove(); - entry.* = undefined; - self.size -= 1; - return removed_entry; - } - } - idx = (idx + 1) & mask; - metadata = self.metadata.? + idx; - } - - return null; + pub fn containsContext(self: *const Self, key: K, ctx: Context) bool { + return self.containsAdapted(key, ctx); + } + pub fn containsAdapted(self: *const Self, key: anytype, ctx: anytype) bool { + return self.getIndex(key, ctx) != null; } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map, - /// and discards it. - pub fn removeAssertDiscard(self: *Self, key: K) void { - assert(self.contains(key)); - - const hash = hashFn(key); - const mask = self.capacity() - 1; - const fingerprint = Metadata.takeFingerprint(hash); - var idx = @truncate(usize, hash & mask); - - var metadata = self.metadata.? + idx; - while (metadata[0].isUsed() or metadata[0].isTombstone()) { - if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { - const entry = &self.entries()[idx]; - if (eqlFn(entry.key, key)) { - metadata[0].remove(); - entry.* = undefined; - self.size -= 1; - return; - } - } - idx = (idx + 1) & mask; - metadata = self.metadata.? + idx; + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and this function returns true. Otherwise this + /// function returns false. + pub fn remove(self: *Self, key: K) bool { + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call removeContext instead."); + return self.removeContext(key, undefined); + } + pub fn removeContext(self: *Self, key: K, ctx: Context) bool { + return self.removeAdapted(key, ctx); + } + pub fn removeAdapted(self: *Self, key: anytype, ctx: anytype) bool { + if (self.getIndex(key, ctx)) |idx| { + self.metadata.?[idx].remove(); + self.keys()[idx] = undefined; + self.values()[idx] = undefined; + self.size -= 1; + return true; } - unreachable; + return false; } fn initMetadatas(self: *Self) void { @@ -718,14 +1309,19 @@ pub fn HashMapUnmanaged( return @truncate(Size, max_load - self.available); } - fn growIfNeeded(self: *Self, allocator: *Allocator, new_count: Size) !void { + fn growIfNeeded(self: *Self, allocator: *Allocator, new_count: Size, ctx: Context) !void { if (new_count > self.available) { - try self.grow(allocator, capacityForSize(self.load() + new_count)); + try self.grow(allocator, capacityForSize(self.load() + new_count), ctx); } } pub fn clone(self: Self, allocator: *Allocator) !Self { - var other = Self{}; + if (@sizeOf(Context) != 0) + @compileError("Cannot infer context "++@typeName(Context)++", call cloneContext instead."); + return self.cloneContext(allocator, @as(Context, undefined)); + } + pub fn cloneContext(self: Self, allocator: *Allocator, new_ctx: anytype) !HashMapUnmanaged(K, V, @TypeOf(new_ctx), max_load_percentage) { + var other = HashMapUnmanaged(K, V, @TypeOf(new_ctx), max_load_percentage){}; if (self.size == 0) return other; @@ -736,11 +1332,11 @@ pub fn HashMapUnmanaged( var i: Size = 0; var metadata = self.metadata.?; - var entr = self.entries(); + var keys_ptr = self.keys(); + var values_ptr = self.values(); while (i < self.capacity()) : (i += 1) { if (metadata[i].isUsed()) { - const entry = &entr[i]; - other.putAssumeCapacityNoClobber(entry.key, entry.value); + other.putAssumeCapacityNoClobberContext(keys_ptr[i], values_ptr[i], new_ctx); if (other.size == self.size) break; } @@ -749,7 +1345,8 @@ pub fn HashMapUnmanaged( return other; } - fn grow(self: *Self, allocator: *Allocator, new_capacity: Size) !void { + fn grow(self: *Self, allocator: *Allocator, new_capacity: Size, ctx: Context) !void { + @setCold(true); const new_cap = std.math.max(new_capacity, minimal_capacity); assert(new_cap > self.capacity()); assert(std.math.isPowerOfTwo(new_cap)); @@ -764,11 +1361,11 @@ pub fn HashMapUnmanaged( const old_capacity = self.capacity(); var i: Size = 0; var metadata = self.metadata.?; - var entr = self.entries(); + var keys_ptr = self.keys(); + var values_ptr = self.values(); while (i < old_capacity) : (i += 1) { if (metadata[i].isUsed()) { - const entry = &entr[i]; - map.putAssumeCapacityNoClobber(entry.key, entry.value); + map.putAssumeCapacityNoClobberContext(keys_ptr[i], values_ptr[i], ctx); if (map.size == self.size) break; } @@ -780,26 +1377,64 @@ pub fn HashMapUnmanaged( } fn allocate(self: *Self, allocator: *Allocator, new_capacity: Size) !void { + const header_align = @alignOf(Header); + const key_align = if (@sizeOf(K) == 0) 1 else @alignOf(K); + const val_align = if (@sizeOf(V) == 0) 1 else @alignOf(V); + const max_align = comptime math.max3(header_align, key_align, val_align); + const meta_size = @sizeOf(Header) + new_capacity * @sizeOf(Metadata); + comptime assert(@alignOf(Metadata) == 1); + + const keys_start = std.mem.alignForward(meta_size, key_align); + const keys_end = keys_start + new_capacity * @sizeOf(K); - const alignment = @alignOf(Entry) - 1; - const entries_size = @as(usize, new_capacity) * @sizeOf(Entry) + alignment; + const vals_start = std.mem.alignForward(keys_end, val_align); + const vals_end = vals_start + new_capacity * @sizeOf(V); - const total_size = meta_size + entries_size; + const total_size = std.mem.alignForward(vals_end, max_align); - const slice = try allocator.alignedAlloc(u8, @alignOf(Header), total_size); + const slice = try allocator.alignedAlloc(u8, max_align, total_size); const ptr = @ptrToInt(slice.ptr); const metadata = ptr + @sizeOf(Header); - var entry_ptr = ptr + meta_size; - entry_ptr = (entry_ptr + alignment) & ~@as(usize, alignment); - assert(entry_ptr + @as(usize, new_capacity) * @sizeOf(Entry) <= ptr + total_size); const hdr = @intToPtr(*Header, ptr); - hdr.entries = @intToPtr([*]Entry, entry_ptr); + if (@sizeOf([*]V) != 0) { + hdr.values = @intToPtr([*]V, ptr + vals_start); + } + if (@sizeOf([*]K) != 0) { + hdr.keys = @intToPtr([*]K, ptr + keys_start); + } hdr.capacity = new_capacity; self.metadata = @intToPtr([*]Metadata, metadata); } + + fn deallocate(self: *Self, allocator: *Allocator) void { + if (self.metadata == null) return; + + const header_align = @alignOf(Header); + const key_align = if (@sizeOf(K) == 0) 1 else @alignOf(K); + const val_align = if (@sizeOf(V) == 0) 1 else @alignOf(V); + const max_align = comptime math.max3(header_align, key_align, val_align); + + const cap = self.capacity(); + const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata); + comptime assert(@alignOf(Metadata) == 1); + + const keys_start = std.mem.alignForward(meta_size, key_align); + const keys_end = keys_start + cap * @sizeOf(K); + + const vals_start = std.mem.alignForward(keys_end, val_align); + const vals_end = vals_start + cap * @sizeOf(V); + + const total_size = std.mem.alignForward(vals_end, max_align); + + const slice = @intToPtr([*]align(max_align) u8, @ptrToInt(self.header()))[0..total_size]; + allocator.free(slice); + + self.metadata = null; + self.available = 0; + } }; } @@ -822,14 +1457,14 @@ test "std.hash_map basic usage" { var sum: u32 = 0; var it = map.iterator(); while (it.next()) |kv| { - sum += kv.key; + sum += kv.key_ptr.*; } - try expect(sum == total); + try expectEqual(total, sum); i = 0; sum = 0; while (i < count) : (i += 1) { - try expectEqual(map.get(i).?, i); + try expectEqual(i, map.get(i).?); sum += map.get(i).?; } try expectEqual(total, sum); @@ -903,7 +1538,7 @@ test "std.hash_map grow" { i = 0; var it = map.iterator(); while (it.next()) |kv| { - try expectEqual(kv.key, kv.value); + try expectEqual(kv.key_ptr.*, kv.value_ptr.*); i += 1; } try expectEqual(i, growTo); @@ -931,9 +1566,9 @@ test "std.hash_map clone" { defer b.deinit(); try expectEqual(b.count(), 3); - try expectEqual(b.get(1), 1); - try expectEqual(b.get(2), 2); - try expectEqual(b.get(3), 3); + try expectEqual(b.get(1).?, 1); + try expectEqual(b.get(2).?, 2); + try expectEqual(b.get(3).?, 3); } test "std.hash_map ensureCapacity with existing elements" { @@ -975,8 +1610,8 @@ test "std.hash_map remove" { try expectEqual(map.count(), 10); var it = map.iterator(); while (it.next()) |kv| { - try expectEqual(kv.key, kv.value); - try expect(kv.key % 3 != 0); + try expectEqual(kv.key_ptr.*, kv.value_ptr.*); + try expect(kv.key_ptr.* % 3 != 0); } i = 0; @@ -1146,7 +1781,7 @@ test "std.hash_map putAssumeCapacity" { i = 0; var sum = i; while (i < 20) : (i += 1) { - sum += map.get(i).?; + sum += map.getPtr(i).?.*; } try expectEqual(sum, 190); @@ -1201,33 +1836,34 @@ test "std.hash_map basic hash map usage" { const gop1 = try map.getOrPut(5); try testing.expect(gop1.found_existing == true); - try testing.expect(gop1.entry.value == 55); - gop1.entry.value = 77; - try testing.expect(map.getEntry(5).?.value == 77); + try testing.expect(gop1.value_ptr.* == 55); + gop1.value_ptr.* = 77; + try testing.expect(map.getEntry(5).?.value_ptr.* == 77); const gop2 = try map.getOrPut(99); try testing.expect(gop2.found_existing == false); - gop2.entry.value = 42; - try testing.expect(map.getEntry(99).?.value == 42); + gop2.value_ptr.* = 42; + try testing.expect(map.getEntry(99).?.value_ptr.* == 42); const gop3 = try map.getOrPutValue(5, 5); - try testing.expect(gop3.value == 77); + try testing.expect(gop3.value_ptr.* == 77); const gop4 = try map.getOrPutValue(100, 41); - try testing.expect(gop4.value == 41); + try testing.expect(gop4.value_ptr.* == 41); try testing.expect(map.contains(2)); - try testing.expect(map.getEntry(2).?.value == 22); + try testing.expect(map.getEntry(2).?.value_ptr.* == 22); try testing.expect(map.get(2).? == 22); - const rmv1 = map.remove(2); + const rmv1 = map.fetchRemove(2); try testing.expect(rmv1.?.key == 2); try testing.expect(rmv1.?.value == 22); - try testing.expect(map.remove(2) == null); + try testing.expect(map.fetchRemove(2) == null); + try testing.expect(map.remove(2) == false); try testing.expect(map.getEntry(2) == null); try testing.expect(map.get(2) == null); - map.removeAssertDiscard(3); + try testing.expect(map.remove(3) == true); } test "std.hash_map clone" { @@ -1247,3 +1883,14 @@ test "std.hash_map clone" { try testing.expect(copy.get(i).? == i * 10); } } + +test "compile everything" { + std.testing.refAllDecls(AutoHashMap(i32, i32)); + std.testing.refAllDecls(StringHashMap([]const u8)); + std.testing.refAllDecls(AutoHashMap(i32, void)); + std.testing.refAllDecls(StringHashMap(u0)); + std.testing.refAllDecls(AutoHashMapUnmanaged(i32, i32)); + std.testing.refAllDecls(StringHashMapUnmanaged([]const u8)); + std.testing.refAllDecls(AutoHashMapUnmanaged(i32, void)); + std.testing.refAllDecls(StringHashMapUnmanaged(u0)); +} diff --git a/lib/std/heap/general_purpose_allocator.zig b/lib/std/heap/general_purpose_allocator.zig index 239ec4309f..4eb8a2a37f 100644 --- a/lib/std/heap/general_purpose_allocator.zig +++ b/lib/std/heap/general_purpose_allocator.zig @@ -346,10 +346,10 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { break; } } - var it = self.large_allocations.iterator(); + var it = self.large_allocations.valueIterator(); while (it.next()) |large_alloc| { log.err("memory address 0x{x} leaked: {s}", .{ - @ptrToInt(large_alloc.value.bytes.ptr), large_alloc.value.getStackTrace(), + @ptrToInt(large_alloc.bytes.ptr), large_alloc.getStackTrace(), }); leaks = true; } @@ -444,7 +444,7 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { } }; - if (config.safety and old_mem.len != entry.value.bytes.len) { + if (config.safety and old_mem.len != entry.value_ptr.bytes.len) { var addresses: [stack_n]usize = [1]usize{0} ** stack_n; var free_stack_trace = StackTrace{ .instruction_addresses = &addresses, @@ -452,9 +452,9 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { }; std.debug.captureStackTrace(ret_addr, &free_stack_trace); log.err("Allocation size {d} bytes does not match free size {d}. Allocation: {s} Free: {s}", .{ - entry.value.bytes.len, + entry.value_ptr.bytes.len, old_mem.len, - entry.value.getStackTrace(), + entry.value_ptr.getStackTrace(), free_stack_trace, }); } @@ -466,7 +466,7 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { log.info("large free {d} bytes at {*}", .{ old_mem.len, old_mem.ptr }); } - self.large_allocations.removeAssertDiscard(@ptrToInt(old_mem.ptr)); + assert(self.large_allocations.remove(@ptrToInt(old_mem.ptr))); return 0; } @@ -475,8 +475,8 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { old_mem.len, old_mem.ptr, new_size, }); } - entry.value.bytes = old_mem.ptr[0..result_len]; - collectStackTrace(ret_addr, &entry.value.stack_addresses); + entry.value_ptr.bytes = old_mem.ptr[0..result_len]; + collectStackTrace(ret_addr, &entry.value_ptr.stack_addresses); return result_len; } @@ -645,8 +645,8 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { const gop = self.large_allocations.getOrPutAssumeCapacity(@ptrToInt(slice.ptr)); assert(!gop.found_existing); // This would mean the kernel double-mapped pages. - gop.entry.value.bytes = slice; - collectStackTrace(ret_addr, &gop.entry.value.stack_addresses); + gop.value_ptr.bytes = slice; + collectStackTrace(ret_addr, &gop.value_ptr.stack_addresses); if (config.verbose_log) { log.info("large alloc {d} bytes at {*}", .{ slice.len, slice.ptr }); diff --git a/lib/std/json.zig b/lib/std/json.zig index 9fb77d501f..c72fc91ba6 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1303,14 +1303,14 @@ pub const Value = union(enum) { try child_whitespace.outputIndent(out_stream); } - try stringify(entry.key, options, out_stream); + try stringify(entry.key_ptr.*, options, out_stream); try out_stream.writeByte(':'); if (child_options.whitespace) |child_whitespace| { if (child_whitespace.separator) { try out_stream.writeByte(' '); } } - try stringify(entry.value, child_options, out_stream); + try stringify(entry.value_ptr.*, child_options, out_stream); } if (field_output) { if (options.whitespace) |whitespace| { diff --git a/lib/std/math.zig b/lib/std/math.zig index 4605c46970..ac28cbb4e2 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -380,12 +380,41 @@ test "math.min" { } } +/// Finds the min of three numbers +pub fn min3(x: anytype, y: anytype, z: anytype) @TypeOf(x, y, z) { + return min(x, min(y, z)); +} + +test "math.min3" { + try testing.expect(min3(@as(i32, 0), @as(i32, 1), @as(i32, 2)) == 0); + try testing.expect(min3(@as(i32, 0), @as(i32, 2), @as(i32, 1)) == 0); + try testing.expect(min3(@as(i32, 1), @as(i32, 0), @as(i32, 2)) == 0); + try testing.expect(min3(@as(i32, 1), @as(i32, 2), @as(i32, 0)) == 0); + try testing.expect(min3(@as(i32, 2), @as(i32, 0), @as(i32, 1)) == 0); + try testing.expect(min3(@as(i32, 2), @as(i32, 1), @as(i32, 0)) == 0); +} + pub fn max(x: anytype, y: anytype) @TypeOf(x, y) { return if (x > y) x else y; } test "math.max" { try testing.expect(max(@as(i32, -1), @as(i32, 2)) == 2); + try testing.expect(max(@as(i32, 2), @as(i32, -1)) == 2); +} + +/// Finds the max of three numbers +pub fn max3(x: anytype, y: anytype, z: anytype) @TypeOf(x, y, z) { + return max(x, max(y, z)); +} + +test "math.max3" { + try testing.expect(max3(@as(i32, 0), @as(i32, 1), @as(i32, 2)) == 2); + try testing.expect(max3(@as(i32, 0), @as(i32, 2), @as(i32, 1)) == 2); + try testing.expect(max3(@as(i32, 1), @as(i32, 0), @as(i32, 2)) == 2); + try testing.expect(max3(@as(i32, 1), @as(i32, 2), @as(i32, 0)) == 2); + try testing.expect(max3(@as(i32, 2), @as(i32, 0), @as(i32, 1)) == 2); + try testing.expect(max3(@as(i32, 2), @as(i32, 1), @as(i32, 0)) == 2); } pub fn clamp(val: anytype, lower: anytype, upper: anytype) @TypeOf(val, lower, upper) { @@ -581,6 +610,17 @@ pub fn Log2Int(comptime T: type) type { return std.meta.Int(.unsigned, count); } +pub fn Log2IntCeil(comptime T: type) type { + // comptime ceil log2 + comptime var count = 0; + comptime var s = @typeInfo(T).Int.bits; + inline while (s != 0) : (s >>= 1) { + count += 1; + } + + return std.meta.Int(.unsigned, count); +} + pub fn IntFittingRange(comptime from: comptime_int, comptime to: comptime_int) type { assert(from <= to); if (from == 0 and to == 0) { @@ -1046,15 +1086,18 @@ fn testCeilPowerOfTwo() !void { } pub fn log2_int(comptime T: type, x: T) Log2Int(T) { + if (@typeInfo(T) != .Int or @typeInfo(T).Int.signedness != .unsigned) + @compileError("log2_int requires an unsigned integer, found "++@typeName(T)); assert(x != 0); return @intCast(Log2Int(T), @typeInfo(T).Int.bits - 1 - @clz(T, x)); } -pub fn log2_int_ceil(comptime T: type, x: T) Log2Int(T) { +pub fn log2_int_ceil(comptime T: type, x: T) Log2IntCeil(T) { + if (@typeInfo(T) != .Int or @typeInfo(T).Int.signedness != .unsigned) + @compileError("log2_int_ceil requires an unsigned integer, found "++@typeName(T)); assert(x != 0); - const log2_val = log2_int(T, x); - if (@as(T, 1) << log2_val == x) - return log2_val; + if (x == 1) return 0; + const log2_val: Log2IntCeil(T) = log2_int(T, x - 1); return log2_val + 1; } diff --git a/lib/std/multi_array_list.zig b/lib/std/multi_array_list.zig index 35df5e8523..c96af48cb2 100644 --- a/lib/std/multi_array_list.zig +++ b/lib/std/multi_array_list.zig @@ -10,6 +10,15 @@ const mem = std.mem; const Allocator = mem.Allocator; const testing = std.testing; +/// A MultiArrayList stores a list of a struct type. +/// Instead of storing a single list of items, MultiArrayList +/// stores separate lists for each field of the struct. +/// This allows for memory savings if the struct has padding, +/// and also improves cache usage if only some fields are needed +/// for a computation. The primary API for accessing fields is +/// the `slice()` function, which computes the start pointers +/// for the array of each field. From the slice you can call +/// `.items(.<field_name>)` to obtain a slice of field values. pub fn MultiArrayList(comptime S: type) type { return struct { bytes: [*]align(@alignOf(S)) u8 = undefined, @@ -20,6 +29,10 @@ pub fn MultiArrayList(comptime S: type) type { pub const Field = meta.FieldEnum(S); + /// A MultiArrayList.Slice contains cached start pointers for each field in the list. + /// These pointers are not normally stored to reduce the size of the list in memory. + /// If you are accessing multiple fields, call slice() first to compute the pointers, + /// and then get the field arrays from the slice. pub const Slice = struct { /// This array is indexed by the field index which can be obtained /// by using @enumToInt() on the Field enum @@ -29,11 +42,12 @@ pub fn MultiArrayList(comptime S: type) type { pub fn items(self: Slice, comptime field: Field) []FieldType(field) { const F = FieldType(field); - if (self.len == 0) { + if (self.capacity == 0) { return &[_]F{}; } const byte_ptr = self.ptrs[@enumToInt(field)]; - const casted_ptr = @ptrCast([*]F, @alignCast(@alignOf(F), byte_ptr)); + const casted_ptr: [*]F = if (@sizeOf([*]F) == 0) undefined + else @ptrCast([*]F, @alignCast(@alignOf(F), byte_ptr)); return casted_ptr[0..self.len]; } @@ -74,12 +88,12 @@ pub fn MultiArrayList(comptime S: type) type { data[i] = .{ .size = @sizeOf(field_info.field_type), .size_index = i, - .alignment = field_info.alignment, + .alignment = if (@sizeOf(field_info.field_type) == 0) 1 else field_info.alignment, }; } const Sort = struct { fn lessThan(trash: *i32, lhs: Data, rhs: Data) bool { - return lhs.alignment >= rhs.alignment; + return lhs.alignment > rhs.alignment; } }; var trash: i32 = undefined; // workaround for stage1 compiler bug @@ -109,6 +123,9 @@ pub fn MultiArrayList(comptime S: type) type { return result; } + /// Compute pointers to the start of each field of the array. + /// If you need to access multiple fields, calling this may + /// be more efficient than calling `items()` multiple times. pub fn slice(self: Self) Slice { var result: Slice = .{ .ptrs = undefined, @@ -123,6 +140,9 @@ pub fn MultiArrayList(comptime S: type) type { return result; } + /// Get the slice of values for a specified field. + /// If you need multiple fields, consider calling slice() + /// instead. pub fn items(self: Self, comptime field: Field) []FieldType(field) { return self.slice().items(field); } @@ -159,6 +179,72 @@ pub fn MultiArrayList(comptime S: type) type { self.set(self.len - 1, elem); } + /// Extend the list by 1 element, asserting `self.capacity` + /// is sufficient to hold an additional item. Returns the + /// newly reserved index with uninitialized data. + pub fn addOneAssumeCapacity(self: *Self) usize { + assert(self.len < self.capacity); + const index = self.len; + self.len += 1; + return index; + } + + /// Inserts an item into an ordered list. Shifts all elements + /// after and including the specified index back by one and + /// sets the given index to the specified element. May reallocate + /// and invalidate iterators. + pub fn insert(self: *Self, gpa: *Allocator, index: usize, elem: S) void { + try self.ensureCapacity(gpa, self.len + 1); + self.insertAssumeCapacity(index, elem); + } + + /// Inserts an item into an ordered list which has room for it. + /// Shifts all elements after and including the specified index + /// back by one and sets the given index to the specified element. + /// Will not reallocate the array, does not invalidate iterators. + pub fn insertAssumeCapacity(self: *Self, index: usize, elem: S) void { + assert(self.len < self.capacity); + assert(index <= self.len); + self.len += 1; + const slices = self.slice(); + inline for (fields) |field_info, field_index| { + const field_slice = slices.items(@intToEnum(Field, field_index)); + var i: usize = self.len-1; + while (i > index) : (i -= 1) { + field_slice[i] = field_slice[i-1]; + } + field_slice[index] = @field(elem, field_info.name); + } + } + + /// Remove the specified item from the list, swapping the last + /// item in the list into its position. Fast, but does not + /// retain list ordering. + pub fn swapRemove(self: *Self, index: usize) void { + const slices = self.slice(); + inline for (fields) |field_info, i| { + const field_slice = slices.items(@intToEnum(Field, i)); + field_slice[index] = field_slice[self.len-1]; + field_slice[self.len-1] = undefined; + } + self.len -= 1; + } + + /// Remove the specified item from the list, shifting items + /// after it to preserve order. + pub fn orderedRemove(self: *Self, index: usize) void { + const slices = self.slice(); + inline for (fields) |field_info, field_index| { + const field_slice = slices.items(@intToEnum(Field, field_index)); + var i = index; + while (i < self.len-1) : (i += 1) { + field_slice[i] = field_slice[i+1]; + } + field_slice[i] = undefined; + } + self.len -= 1; + } + /// Adjust the list's length to `new_len`. /// Does not initialize added items, if any. pub fn resize(self: *Self, gpa: *Allocator, new_len: usize) !void { @@ -186,13 +272,15 @@ pub fn MultiArrayList(comptime S: type) type { ) catch { const self_slice = self.slice(); inline for (fields) |field_info, i| { - const field = @intToEnum(Field, i); - const dest_slice = self_slice.items(field)[new_len..]; - const byte_count = dest_slice.len * @sizeOf(field_info.field_type); - // We use memset here for more efficient codegen in safety-checked, - // valgrind-enabled builds. Otherwise the valgrind client request - // will be repeated for every element. - @memset(@ptrCast([*]u8, dest_slice.ptr), undefined, byte_count); + if (@sizeOf(field_info.field_type) != 0) { + const field = @intToEnum(Field, i); + const dest_slice = self_slice.items(field)[new_len..]; + const byte_count = dest_slice.len * @sizeOf(field_info.field_type); + // We use memset here for more efficient codegen in safety-checked, + // valgrind-enabled builds. Otherwise the valgrind client request + // will be repeated for every element. + @memset(@ptrCast([*]u8, dest_slice.ptr), undefined, byte_count); + } } self.len = new_len; return; @@ -206,12 +294,14 @@ pub fn MultiArrayList(comptime S: type) type { const self_slice = self.slice(); const other_slice = other.slice(); inline for (fields) |field_info, i| { - const field = @intToEnum(Field, i); - // TODO we should be able to use std.mem.copy here but it causes a - // test failure on aarch64 with -OReleaseFast - const src_slice = mem.sliceAsBytes(self_slice.items(field)); - const dst_slice = mem.sliceAsBytes(other_slice.items(field)); - @memcpy(dst_slice.ptr, src_slice.ptr, src_slice.len); + if (@sizeOf(field_info.field_type) != 0) { + const field = @intToEnum(Field, i); + // TODO we should be able to use std.mem.copy here but it causes a + // test failure on aarch64 with -OReleaseFast + const src_slice = mem.sliceAsBytes(self_slice.items(field)); + const dst_slice = mem.sliceAsBytes(other_slice.items(field)); + @memcpy(dst_slice.ptr, src_slice.ptr, src_slice.len); + } } gpa.free(self.allocatedBytes()); self.* = other; @@ -273,17 +363,41 @@ pub fn MultiArrayList(comptime S: type) type { const self_slice = self.slice(); const other_slice = other.slice(); inline for (fields) |field_info, i| { - const field = @intToEnum(Field, i); - // TODO we should be able to use std.mem.copy here but it causes a - // test failure on aarch64 with -OReleaseFast - const src_slice = mem.sliceAsBytes(self_slice.items(field)); - const dst_slice = mem.sliceAsBytes(other_slice.items(field)); - @memcpy(dst_slice.ptr, src_slice.ptr, src_slice.len); + if (@sizeOf(field_info.field_type) != 0) { + const field = @intToEnum(Field, i); + // TODO we should be able to use std.mem.copy here but it causes a + // test failure on aarch64 with -OReleaseFast + const src_slice = mem.sliceAsBytes(self_slice.items(field)); + const dst_slice = mem.sliceAsBytes(other_slice.items(field)); + @memcpy(dst_slice.ptr, src_slice.ptr, src_slice.len); + } } gpa.free(self.allocatedBytes()); self.* = other; } + /// Create a copy of this list with a new backing store, + /// using the specified allocator. + pub fn clone(self: Self, gpa: *Allocator) !Self { + var result = Self{}; + errdefer result.deinit(gpa); + try result.ensureCapacity(gpa, self.len); + result.len = self.len; + const self_slice = self.slice(); + const result_slice = result.slice(); + inline for (fields) |field_info, i| { + if (@sizeOf(field_info.field_type) != 0) { + const field = @intToEnum(Field, i); + // TODO we should be able to use std.mem.copy here but it causes a + // test failure on aarch64 with -OReleaseFast + const src_slice = mem.sliceAsBytes(self_slice.items(field)); + const dst_slice = mem.sliceAsBytes(result_slice.items(field)); + @memcpy(dst_slice.ptr, src_slice.ptr, src_slice.len); + } + } + return result; + } + fn capacityInBytes(capacity: usize) usize { const sizes_vector: std.meta.Vector(sizes.bytes.len, usize) = sizes.bytes; const capacity_vector = @splat(sizes.bytes.len, capacity); diff --git a/lib/std/process.zig b/lib/std/process.zig index f026c640e0..d5a2045699 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -85,7 +85,7 @@ pub fn getEnvMap(allocator: *Allocator) !BufMap { i += 1; // skip over null byte - try result.setMove(key, value); + try result.putMove(key, value); } return result; } else if (builtin.os.tag == .wasi) { @@ -112,7 +112,7 @@ pub fn getEnvMap(allocator: *Allocator) !BufMap { var parts = mem.split(pair, "="); const key = parts.next().?; const value = parts.next().?; - try result.set(key, value); + try result.put(key, value); } return result; } else if (builtin.link_libc) { @@ -126,7 +126,7 @@ pub fn getEnvMap(allocator: *Allocator) !BufMap { while (line[end_i] != 0) : (end_i += 1) {} const value = line[line_i + 1 .. end_i]; - try result.set(key, value); + try result.put(key, value); } return result; } else { @@ -139,7 +139,7 @@ pub fn getEnvMap(allocator: *Allocator) !BufMap { while (line[end_i] != 0) : (end_i += 1) {} const value = line[line_i + 1 .. end_i]; - try result.set(key, value); + try result.put(key, value); } return result; } |
