diff options
| author | Sahnvour <sahnvour@pm.me> | 2020-08-02 23:24:03 +0200 |
|---|---|---|
| committer | Sahnvour <sahnvour@pm.me> | 2020-09-02 00:17:50 +0200 |
| commit | 575fbd5e3592cff70cbfc5153884d919e6bed89f (patch) | |
| tree | 91b86f38fc18a2eae631fe4cdf50502d8c71ab8f /lib/std/array_hash_map.zig | |
| parent | 3f7cb14b267bbd823597ba24fd2e3f6f66abcbaa (diff) | |
| download | zig-575fbd5e3592cff70cbfc5153884d919e6bed89f.tar.gz zig-575fbd5e3592cff70cbfc5153884d919e6bed89f.zip | |
hash_map: rename to ArrayHashMap and add new HashMap implementation
Diffstat (limited to 'lib/std/array_hash_map.zig')
| -rw-r--r-- | lib/std/array_hash_map.zig | 1087 |
1 files changed, 1087 insertions, 0 deletions
diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig new file mode 100644 index 0000000000..f8c3623ef2 --- /dev/null +++ b/lib/std/array_hash_map.zig @@ -0,0 +1,1087 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const std = @import("std.zig"); +const debug = std.debug; +const assert = debug.assert; +const testing = std.testing; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const trait = meta.trait; +const autoHash = std.hash.autoHash; +const Wyhash = std.hash.Wyhash; +const Allocator = mem.Allocator; +const builtin = @import("builtin"); +const hash_map = @This(); + +pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type { + return ArrayHashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); +} + +pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type { + return ArrayHashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); +} + +/// Builtin hashmap for strings as keys. +pub fn StringArrayHashMap(comptime V: type) type { + return ArrayHashMap([]const u8, V, hashString, eqlString, true); +} + +pub fn StringArrayHashMapUnmanaged(comptime V: type) type { + return ArrayHashMapUnmanaged([]const u8, V, hashString, eqlString, true); +} + +pub fn eqlString(a: []const u8, b: []const u8) bool { + return mem.eql(u8, a, b); +} + +pub fn hashString(s: []const u8) u32 { + return @truncate(u32, std.hash.Wyhash.hash(0, s)); +} + +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// For a hash map that can be initialized directly that does not store an Allocator +/// field, see `ArrayHashMapUnmanaged`. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but only has to call `eql` for hash collisions. +/// If typical operations (except iteration over entries) need to be faster, prefer +/// the alternative `std.HashMap`. +pub fn ArrayHashMap( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { + return struct { + unmanaged: Unmanaged, + allocator: *Allocator, + + pub const Unmanaged = ArrayHashMapUnmanaged(K, V, hash, eql, store_hash); + pub const Entry = Unmanaged.Entry; + pub const Hash = Unmanaged.Hash; + pub const GetOrPutResult = Unmanaged.GetOrPutResult; + + /// Deprecated. Iterate using `items`. + pub const Iterator = struct { + hm: *const Self, + /// Iterator through the entry array. + index: usize, + + pub fn next(it: *Iterator) ?*Entry { + if (it.index >= it.hm.unmanaged.entries.items.len) return null; + const result = &it.hm.unmanaged.entries.items[it.index]; + it.index += 1; + return result; + } + + /// Reset the iterator to the initial index + pub fn reset(it: *Iterator) void { + it.index = 0; + } + }; + + const Self = @This(); + const Index = Unmanaged.Index; + + pub fn init(allocator: *Allocator) Self { + return .{ + .unmanaged = .{}, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self) void { + self.unmanaged.deinit(self.allocator); + self.* = undefined; + } + + pub fn clearRetainingCapacity(self: *Self) void { + return self.unmanaged.clearRetainingCapacity(); + } + + pub fn clearAndFree(self: *Self) void { + return self.unmanaged.clearAndFree(self.allocator); + } + + /// Deprecated. Use `items().len`. + pub fn count(self: Self) usize { + return self.items().len; + } + + /// Deprecated. Iterate using `items`. + pub fn iterator(self: *const Self) Iterator { + return Iterator{ + .hm = self, + .index = 0, + }; + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { + return self.unmanaged.getOrPut(self.allocator, key); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + return self.unmanaged.getOrPutAssumeCapacity(key); + } + + pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { + return self.unmanaged.getOrPutValue(self.allocator, key, value); + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { + return self.unmanaged.ensureCapacity(self.allocator, new_capacity); + } + + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: *Self) usize { + return self.unmanaged.capacity(); + } + + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, key: K, value: V) !void { + return self.unmanaged.put(self.allocator, key, value); + } + + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present + pub fn putNoClobber(self: *Self, key: K, value: V) !void { + return self.unmanaged.putNoClobber(self.allocator, key, value); + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + return self.unmanaged.putAssumeCapacity(key, value); + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + return self.unmanaged.putAssumeCapacityNoClobber(key, value); + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { + return self.unmanaged.fetchPut(self.allocator, key, value); + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happuns, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + return self.unmanaged.fetchPutAssumeCapacity(key, value); + } + + pub fn getEntry(self: Self, key: K) ?*Entry { + return self.unmanaged.getEntry(key); + } + + pub fn getIndex(self: Self, key: K) ?usize { + return self.unmanaged.getIndex(key); + } + + pub fn get(self: Self, key: K) ?V { + return self.unmanaged.get(key); + } + + pub fn contains(self: Self, key: K) bool { + return self.unmanaged.contains(key); + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + return self.unmanaged.remove(key); + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + return self.unmanaged.removeAssertDiscard(key); + } + + pub fn items(self: Self) []Entry { + return self.unmanaged.items(); + } + + pub fn clone(self: Self) !Self { + var other = try self.unmanaged.clone(self.allocator); + return other.promote(self.allocator); + } + }; +} + +/// General purpose hash table. +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// This type does not store an Allocator field - the Allocator must be passed in +/// with each function call that requires it. See `ArrayHashMap` for a type that stores +/// an Allocator field for convenience. +/// Can be initialized directly using the default field values. +/// This type is designed to have low overhead for small numbers of entries. When +/// `store_hash` is `false` and the number of entries in the map is less than 9, +/// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is +/// only a single pointer-sized integer. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but guarantees only one call to `eql` per insertion/deletion. +pub fn ArrayHashMapUnmanaged( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { + return struct { + /// It is permitted to access this field directly. + entries: std.ArrayListUnmanaged(Entry) = .{}, + + /// When entries length is less than `linear_scan_max`, this remains `null`. + /// Once entries length grows big enough, this field is allocated. There is + /// an IndexHeader followed by an array of Index(I) structs, where I is defined + /// by how many total indexes there are. + index_header: ?*IndexHeader = null, + + /// Modifying the key is illegal behavior. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this ArrayHashMap is modified, + /// unless `ensureCapacity` was previously used. + pub const Entry = struct { + /// This field is `void` if `store_hash` is `false`. + hash: Hash, + key: K, + value: V, + }; + + pub const Hash = if (store_hash) u32 else void; + + pub const GetOrPutResult = struct { + entry: *Entry, + found_existing: bool, + }; + + pub const Managed = ArrayHashMap(K, V, hash, eql, store_hash); + + const Self = @This(); + + const linear_scan_max = 8; + + pub fn promote(self: Self, allocator: *Allocator) Managed { + return .{ + .unmanaged = self, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self, allocator: *Allocator) void { + self.entries.deinit(allocator); + if (self.index_header) |header| { + header.free(allocator); + } + self.* = undefined; + } + + pub fn clearRetainingCapacity(self: *Self) void { + self.entries.items.len = 0; + if (self.index_header) |header| { + header.max_distance_from_start_index = 0; + switch (header.capacityIndexType()) { + .u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty), + .u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty), + .u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty), + .usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty), + } + } + } + + pub fn clearAndFree(self: *Self, allocator: *Allocator) void { + self.entries.shrink(allocator, 0); + if (self.index_header) |header| { + header.free(allocator); + self.index_header = null; + } + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { + self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| { + // "If key exists this function cannot fail." + return GetOrPutResult{ + .entry = self.getEntry(key) orelse return err, + .found_existing = true, + }; + }; + return self.getOrPutAssumeCapacity(key); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item| { + if (item.hash == h and eql(key, item.key)) { + return GetOrPutResult{ + .entry = item, + .found_existing = true, + }; + } + } + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return GetOrPutResult{ + .entry = new_entry, + .found_existing = false, + }; + }; + + switch (header.capacityIndexType()) { + .u8 => return self.getOrPutInternal(key, header, u8), + .u16 => return self.getOrPutInternal(key, header, u16), + .u32 => return self.getOrPutInternal(key, header, u32), + .usize => return self.getOrPutInternal(key, header, usize), + } + } + + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { + const res = try self.getOrPut(allocator, key); + if (!res.found_existing) + res.entry.value = value; + + return res.entry; + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { + try self.entries.ensureCapacity(allocator, new_capacity); + if (new_capacity <= linear_scan_max) return; + + // Ensure that the indexes will be at most 60% full if + // `new_capacity` items are put into it. + const needed_len = new_capacity * 5 / 3; + if (self.index_header) |header| { + if (needed_len > header.indexes_len) { + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; + const new_header = try IndexHeader.alloc(allocator, new_indexes_len); + self.insertAllEntriesIntoNewHeader(new_header); + header.free(allocator); + self.index_header = new_header; + } + } else { + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; + const header = try IndexHeader.alloc(allocator, new_indexes_len); + self.insertAllEntriesIntoNewHeader(header); + self.index_header = header; + } + } + + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: Self) usize { + const entry_cap = self.entries.capacity; + const header = self.index_header orelse return math.min(linear_scan_max, entry_cap); + const indexes_cap = (header.indexes_len + 1) * 3 / 4; + return math.min(entry_cap, indexes_cap); + } + + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + result.entry.value = value; + } + + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present + pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { + const gop = try self.getOrPut(allocator, key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happens, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + const gop = self.getOrPutAssumeCapacity(key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + pub fn getEntry(self: Self, key: K) ?*Entry { + const index = self.getIndex(key) orelse return null; + return &self.entries.items[index]; + } + + pub fn getIndex(self: Self, key: K) ?usize { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item, i| { + if (item.hash == h and eql(key, item.key)) { + return i; + } + } + return null; + }; + switch (header.capacityIndexType()) { + .u8 => return self.getInternal(key, header, u8), + .u16 => return self.getInternal(key, header, u16), + .u32 => return self.getInternal(key, header, u32), + .usize => return self.getInternal(key, header, usize), + } + } + + pub fn get(self: Self, key: K) ?V { + return if (self.getEntry(key)) |entry| entry.value else null; + } + + pub fn contains(self: Self, key: K) bool { + return self.getEntry(key) != null; + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |item, i| { + if (item.hash == h and eql(key, item.key)) { + return self.entries.swapRemove(i); + } + } + return null; + }; + switch (header.capacityIndexType()) { + .u8 => return self.removeInternal(key, header, u8), + .u16 => return self.removeInternal(key, header, u16), + .u32 => return self.removeInternal(key, header, u32), + .usize => return self.removeInternal(key, header, usize), + } + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + assert(self.remove(key) != null); + } + + pub fn items(self: Self) []Entry { + return self.entries.items; + } + + pub fn clone(self: Self, allocator: *Allocator) !Self { + var other: Self = .{}; + try other.entries.appendSlice(allocator, self.entries.items); + + if (self.index_header) |header| { + const new_header = try IndexHeader.alloc(allocator, header.indexes_len); + other.insertAllEntriesIntoNewHeader(new_header); + other.index_header = new_header; + } + return other; + } + + fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + var index = &indexes[index_index]; + if (index.isEmpty()) + return null; + + const entry = &self.entries.items[index.entry_index]; + + const hash_match = if (store_hash) h == entry.hash else true; + if (!hash_match or !eql(key, entry.key)) + continue; + + const removed_entry = self.entries.swapRemove(index.entry_index); + if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) { + // Because of the swap remove, now we need to update the index that was + // pointing to the last entry and is now pointing to this removed item slot. + self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes); + } + + // Now we have to shift over the following indexes. + roll_over += 1; + while (roll_over < header.indexes_len) : (roll_over += 1) { + const next_index_index = header.constrainIndex(start_index + roll_over); + const next_index = &indexes[next_index_index]; + if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { + index.setEmpty(); + return removed_entry; + } + index.* = next_index.*; + index.distance_from_start_index -= 1; + index = next_index; + } + unreachable; + } + return null; + } + + fn updateEntryIndex( + self: *Self, + header: *IndexHeader, + old_entry_index: usize, + new_entry_index: usize, + comptime I: type, + indexes: []Index(I), + ) void { + const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = &indexes[index_index]; + if (index.entry_index == old_entry_index) { + index.entry_index = @intCast(I, new_entry_index); + return; + } + } + unreachable; + } + + /// Must ensureCapacity before calling this. + fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over <= header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = indexes[index_index]; + if (index.isEmpty()) { + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + + // This pointer survives the following append because we call + // entries.ensureCapacity before getOrPutInternal. + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) { + return .{ + .found_existing = true, + .entry = entry, + }; + } + if (index.distance_from_start_index < distance_from_start_index) { + // In this case, we did not find the item. We will put a new entry. + // However, we will use this index for the new entry, and move + // the previous index down the line, to keep the max_distance_from_start_index + // as small as possible. + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + + distance_from_start_index = index.distance_from_start_index; + var prev_entry_index = index.entry_index; + + // Find somewhere to put the index we replaced by shifting + // following indexes backwards. + roll_over += 1; + distance_from_start_index += 1; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const next_index_index = header.constrainIndex(start_index + roll_over); + const next_index = indexes[next_index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + prev_entry_index = next_index.entry_index; + } + } + unreachable; + } + } + unreachable; + } + + fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?usize { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = indexes[index_index]; + if (index.isEmpty()) + return null; + + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) + return index.entry_index; + } + return null; + } + + fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void { + switch (header.capacityIndexType()) { + .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8), + .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16), + .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32), + .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize), + } + } + + fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void { + const indexes = header.indexes(I); + entry_loop: for (self.entries.items) |entry, i| { + const h = if (store_hash) entry.hash else hash(entry.key); + const start_index = header.constrainIndex(h); + var entry_index = i; + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = header.constrainIndex(start_index + roll_over); + const next_index = indexes[index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + continue :entry_loop; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + entry_index = next_index.entry_index; + } + } + unreachable; + } + } + }; +} + +const CapacityIndexType = enum { u8, u16, u32, usize }; + +fn capacityIndexType(indexes_len: usize) CapacityIndexType { + if (indexes_len < math.maxInt(u8)) + return .u8; + if (indexes_len < math.maxInt(u16)) + return .u16; + if (indexes_len < math.maxInt(u32)) + return .u32; + return .usize; +} + +fn capacityIndexSize(indexes_len: usize) usize { + switch (capacityIndexType(indexes_len)) { + .u8 => return @sizeOf(Index(u8)), + .u16 => return @sizeOf(Index(u16)), + .u32 => return @sizeOf(Index(u32)), + .usize => return @sizeOf(Index(usize)), + } +} + +fn Index(comptime I: type) type { + return extern struct { + entry_index: I, + distance_from_start_index: I, + + const Self = @This(); + + const empty = Self{ + .entry_index = math.maxInt(I), + .distance_from_start_index = undefined, + }; + + fn isEmpty(idx: Self) bool { + return idx.entry_index == math.maxInt(I); + } + + fn setEmpty(idx: *Self) void { + idx.entry_index = math.maxInt(I); + } + }; +} + +/// This struct is trailed by an array of `Index(I)`, where `I` +/// and the array length are determined by `indexes_len`. +const IndexHeader = struct { + max_distance_from_start_index: usize, + indexes_len: usize, + + fn constrainIndex(header: IndexHeader, i: usize) usize { + // This is an optimization for modulo of power of two integers; + // it requires `indexes_len` to always be a power of two. + return i & (header.indexes_len - 1); + } + + fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { + const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); + return start[0..header.indexes_len]; + } + + fn capacityIndexType(header: IndexHeader) CapacityIndexType { + return hash_map.capacityIndexType(header.indexes_len); + } + + fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void { + if (distance_from_start_index > header.max_distance_from_start_index) { + header.max_distance_from_start_index = distance_from_start_index; + } + } + + fn alloc(allocator: *Allocator, len: usize) !*IndexHeader { + const index_size = hash_map.capacityIndexSize(len); + const nbytes = @sizeOf(IndexHeader) + index_size * len; + const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact); + @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader)); + const result = @ptrCast(*IndexHeader, bytes.ptr); + result.* = .{ + .max_distance_from_start_index = 0, + .indexes_len = len, + }; + return result; + } + + fn free(header: *IndexHeader, allocator: *Allocator) void { + const index_size = hash_map.capacityIndexSize(header.indexes_len); + const ptr = @ptrCast([*]u8, header); + const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size]; + allocator.free(slice); + } +}; + +test "basic hash map usage" { + var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + testing.expect((try map.fetchPut(1, 11)) == null); + testing.expect((try map.fetchPut(2, 22)) == null); + testing.expect((try map.fetchPut(3, 33)) == null); + testing.expect((try map.fetchPut(4, 44)) == null); + + try map.putNoClobber(5, 55); + testing.expect((try map.fetchPut(5, 66)).?.value == 55); + testing.expect((try map.fetchPut(5, 55)).?.value == 66); + + const gop1 = try map.getOrPut(5); + testing.expect(gop1.found_existing == true); + testing.expect(gop1.entry.value == 55); + gop1.entry.value = 77; + testing.expect(map.getEntry(5).?.value == 77); + + const gop2 = try map.getOrPut(99); + testing.expect(gop2.found_existing == false); + gop2.entry.value = 42; + testing.expect(map.getEntry(99).?.value == 42); + + const gop3 = try map.getOrPutValue(5, 5); + testing.expect(gop3.value == 77); + + const gop4 = try map.getOrPutValue(100, 41); + testing.expect(gop4.value == 41); + + testing.expect(map.contains(2)); + testing.expect(map.getEntry(2).?.value == 22); + testing.expect(map.get(2).? == 22); + + const rmv1 = map.remove(2); + testing.expect(rmv1.?.key == 2); + testing.expect(rmv1.?.value == 22); + testing.expect(map.remove(2) == null); + testing.expect(map.getEntry(2) == null); + testing.expect(map.get(2) == null); + + map.removeAssertDiscard(3); +} + +test "iterator hash map" { + // https://github.com/ziglang/zig/issues/5127 + if (std.Target.current.cpu.arch == .mips) return error.SkipZigTest; + + var reset_map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer reset_map.deinit(); + + // test ensureCapacity with a 0 parameter + try reset_map.ensureCapacity(0); + + try reset_map.putNoClobber(0, 11); + try reset_map.putNoClobber(1, 22); + try reset_map.putNoClobber(2, 33); + + var keys = [_]i32{ + 0, 2, 1, + }; + + var values = [_]i32{ + 11, 33, 22, + }; + + var buffer = [_]i32{ + 0, 0, 0, + }; + + var it = reset_map.iterator(); + const first_entry = it.next().?; + it.reset(); + + var count: usize = 0; + while (it.next()) |entry| : (count += 1) { + buffer[@intCast(usize, entry.key)] = entry.value; + } + testing.expect(count == 3); + testing.expect(it.next() == null); + + for (buffer) |v, i| { + testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); + } + + it.reset(); + count = 0; + while (it.next()) |entry| { + buffer[@intCast(usize, entry.key)] = entry.value; + count += 1; + if (count >= 2) break; + } + + for (buffer[0..2]) |v, i| { + testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); + } + + it.reset(); + var entry = it.next().?; + testing.expect(entry.key == first_entry.key); + testing.expect(entry.value == first_entry.value); +} + +test "ensure capacity" { + var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + try map.ensureCapacity(20); + const initial_capacity = map.capacity(); + testing.expect(initial_capacity >= 20); + var i: i32 = 0; + while (i < 20) : (i += 1) { + testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null); + } + // shouldn't resize from putAssumeCapacity + testing.expect(initial_capacity == map.capacity()); +} + +test "clone" { + var original = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer original.deinit(); + + // put more than `linear_scan_max` so we can test that the index header is properly cloned + var i: u8 = 0; + while (i < 10) : (i += 1) { + try original.putNoClobber(i, i * 10); + } + + var copy = try original.clone(); + defer copy.deinit(); + + i = 0; + while (i < 10) : (i += 1) { + testing.expect(copy.get(i).? == i * 10); + } +} + +pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + return getAutoHashFn(usize)(@ptrToInt(key)); + } + }.hash; +} + +pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return a == b; + } + }.eql; +} + +pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + if (comptime trait.hasUniqueRepresentation(K)) { + return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key))); + } else { + var hasher = Wyhash.init(0); + autoHash(&hasher, key); + return @truncate(u32, hasher.final()); + } + } + }.hash; +} + +pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return meta.eql(a, b); + } + }.eql; +} + +pub fn autoEqlIsCheap(comptime K: type) bool { + return switch (@typeInfo(K)) { + .Bool, + .Int, + .Float, + .Pointer, + .ComptimeFloat, + .ComptimeInt, + .Enum, + .Fn, + .ErrorSet, + .AnyFrame, + .EnumLiteral, + => true, + else => false, + }; +} + +pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + var hasher = Wyhash.init(0); + std.hash.autoHashStrat(&hasher, key, strategy); + return @truncate(u32, hasher.final()); + } + }.hash; +} |
