Merge pull request #19388 from ziglang/cache-dedup

cache system file deduplication
author: Andrew Kelley <andrew@ziglang.org> 2024-03-22 01:13:43 -0700
committer: GitHub <noreply@github.com> 2024-03-22 01:13:43 -0700
commit: a2651cbc829d44df4c3773037598b30e8cf0c4da (patch)
tree: 555c74b10683ae9678c68777310116f47142a8aa /lib/std/array_hash_map.zig
parent: 54c08579e4859673391843182aa2fd44aabbf6cf (diff)
parent: 950359071bca707dbc9763f1bf3ebc79cd52ebca (diff)
download: zig-a2651cbc829d44df4c3773037598b30e8cf0c4da.tar.gz
zig-a2651cbc829d44df4c3773037598b30e8cf0c4da.zip
1 files changed, 56 insertions, 58 deletions
diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig
index d3bd1b2a04..ba086f8764 100644
--- a/lib/std/array_hash_map.zig
+++ b/lib/std/array_hash_map.zig
@@ -9,23 +9,26 @@ const Wyhash = std.hash.Wyhash;
 const Allocator = mem.Allocator;
 const hash_map = @This();
 
-/// An ArrayHashMap with default hash and equal functions.
-/// See AutoContext for a description of the hash and equal implementations.
+/// An `ArrayHashMap` with default hash and equal functions.
+///
+/// See `AutoContext` for a description of the hash and equal implementations.
 pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type {
     return ArrayHashMap(K, V, AutoContext(K), !autoEqlIsCheap(K));
 }
 
-/// An ArrayHashMapUnmanaged with default hash and equal functions.
-/// See AutoContext for a description of the hash and equal implementations.
+/// An `ArrayHashMapUnmanaged` with default hash and equal functions.
+///
+/// See `AutoContext` for a description of the hash and equal implementations.
 pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type {
     return ArrayHashMapUnmanaged(K, V, AutoContext(K), !autoEqlIsCheap(K));
 }
 
-/// Builtin hashmap for strings as keys.
+/// An `ArrayHashMap` with strings as keys.
 pub fn StringArrayHashMap(comptime V: type) type {
     return ArrayHashMap([]const u8, V, StringContext, true);
 }
 
+/// An `ArrayHashMapUnmanaged` with strings as keys.
 pub fn StringArrayHashMapUnmanaged(comptime V: type) type {
     return ArrayHashMapUnmanaged([]const u8, V, StringContext, true);
 }
@@ -50,29 +53,33 @@ pub fn hashString(s: []const u8) u32 {
     return @as(u32, @truncate(std.hash.Wyhash.hash(0, s)));
 }
 
-/// Insertion order is preserved.
-/// Deletions perform a "swap removal" on the entries list.
+/// A hash table of keys and values, each stored sequentially.
+///
+/// Insertion order is preserved. In general, this data structure supports the same
+/// operations as `std.ArrayList`.
+///
+/// Deletion operations:
+/// * `swapRemove` - O(1)
+/// * `orderedRemove` - O(N)
+///
 /// Modifying the hash map while iterating is allowed, however, one must understand
 /// the (well defined) behavior when mixing insertions and deletions with iteration.
-/// For a hash map that can be initialized directly that does not store an Allocator
-/// field, see `ArrayHashMapUnmanaged`.
-/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
-/// functions. It does not store each item's hash in the table. Setting `store_hash`
-/// to `true` incurs slightly more memory cost by storing each key's hash in the table
-/// but only has to call `eql` for hash collisions.
-/// If typical operations (except iteration over entries) need to be faster, prefer
-/// the alternative `std.HashMap`.
-/// Context must be a struct type with two member functions:
-///   hash(self, K) u32
-///   eql(self, K, K, usize) bool
-/// Adapted variants of many functions are provided.  These variants
-/// take a pseudo key instead of a key.  Their context must have the functions:
-///   hash(self, PseudoKey) u32
-///   eql(self, PseudoKey, K, usize) bool
+///
+/// See `ArrayHashMapUnmanaged` for a variant of this data structure that accepts an
+/// `Allocator` as a parameter when needed rather than storing it.
 pub fn ArrayHashMap(
     comptime K: type,
     comptime V: type,
+    /// A namespace that provides these two functions:
+    /// * `pub fn hash(self, K) u32`
+    /// * `pub fn eql(self, K, K) bool`
+    ///
     comptime Context: type,
+    /// When `false`, this data structure is biased towards cheap `eql`
+    /// functions and avoids storing each key's hash in the table. Setting
+    /// `store_hash` to `true` incurs more memory cost but limits `eql` to
+    /// being called only once per insertion/deletion (provided there are no
+    /// hash collisions).
     comptime store_hash: bool,
 ) type {
     return struct {
@@ -472,34 +479,40 @@ pub fn ArrayHashMap(
     };
 }
 
-/// General purpose hash table.
-/// Insertion order is preserved.
-/// Deletions perform a "swap removal" on the entries list.
+/// A hash table of keys and values, each stored sequentially.
+///
+/// Insertion order is preserved. In general, this data structure supports the same
+/// operations as `std.ArrayListUnmanaged`.
+///
+/// Deletion operations:
+/// * `swapRemove` - O(1)
+/// * `orderedRemove` - O(N)
+///
 /// Modifying the hash map while iterating is allowed, however, one must understand
 /// the (well defined) behavior when mixing insertions and deletions with iteration.
-/// This type does not store an Allocator field - the Allocator must be passed in
+///
+/// This type does not store an `Allocator` field - the `Allocator` must be passed in
 /// with each function call that requires it. See `ArrayHashMap` for a type that stores
-/// an Allocator field for convenience.
+/// an `Allocator` field for convenience.
+///
 /// Can be initialized directly using the default field values.
+///
 /// This type is designed to have low overhead for small numbers of entries. When
 /// `store_hash` is `false` and the number of entries in the map is less than 9,
 /// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is
 /// only a single pointer-sized integer.
-/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
-/// functions. It does not store each item's hash in the table. Setting `store_hash`
-/// to `true` incurs slightly more memory cost by storing each key's hash in the table
-/// but guarantees only one call to `eql` per insertion/deletion.
-/// Context must be a struct type with two member functions:
-///   hash(self, K) u32
-///   eql(self, K, K) bool
-/// Adapted variants of many functions are provided.  These variants
-/// take a pseudo key instead of a key.  Their context must have the functions:
-///   hash(self, PseudoKey) u32
-///   eql(self, PseudoKey, K) bool
 pub fn ArrayHashMapUnmanaged(
     comptime K: type,
     comptime V: type,
+    /// A namespace that provides these two functions:
+    /// * `pub fn hash(self, K) u32`
+    /// * `pub fn eql(self, K, K) bool`
     comptime Context: type,
+    /// When `false`, this data structure is biased towards cheap `eql`
+    /// functions and avoids storing each key's hash in the table. Setting
+    /// `store_hash` to `true` incurs more memory cost but limits `eql` to
+    /// being called only once per insertion/deletion (provided there are no
+    /// hash collisions).
     comptime store_hash: bool,
 ) type {
     return struct {
@@ -516,10 +529,6 @@ pub fn ArrayHashMapUnmanaged(
         /// Used to detect memory safety violations.
         pointer_stability: std.debug.SafetyLock = .{},
 
-        comptime {
-            std.hash_map.verifyContext(Context, K, K, u32, true);
-        }
-
         /// Modifying the key is allowed only if it does not change the hash.
         /// Modifying the value is allowed.
         /// Entry pointers become invalid whenever this ArrayHashMap is modified,
@@ -1834,27 +1843,16 @@ pub fn ArrayHashMapUnmanaged(
             }
         }
 
-        inline fn checkedHash(ctx: anytype, key: anytype) u32 {
-            comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32, true);
+        fn checkedHash(ctx: anytype, key: anytype) u32 {
             // If you get a compile error on the next line, it means that your
             // generic hash function doesn't accept your key.
-            const hash = ctx.hash(key);
-            if (@TypeOf(hash) != u32) {
-                @compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type!\n" ++
-                    @typeName(u32) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));
-            }
-            return hash;
+            return ctx.hash(key);
         }
-        inline fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {
-            comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32, true);
+
+        fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {
             // If you get a compile error on the next line, it means that your
             // generic eql function doesn't accept (self, adapt key, K, index).
-            const eql = ctx.eql(a, b, b_index);
-            if (@TypeOf(eql) != bool) {
-                @compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++
-                    @typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql)));
-            }
-            return eql;
+            return ctx.eql(a, b, b_index);
         }
 
         fn dumpState(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8) void {
author	Andrew Kelley <andrew@ziglang.org>	2024-03-22 01:13:43 -0700
committer	GitHub <noreply@github.com>	2024-03-22 01:13:43 -0700
commit	a2651cbc829d44df4c3773037598b30e8cf0c4da (patch)
tree	555c74b10683ae9678c68777310116f47142a8aa /lib/std/array_hash_map.zig
parent	54c08579e4859673391843182aa2fd44aabbf6cf (diff)
parent	950359071bca707dbc9763f1bf3ebc79cd52ebca (diff)
download	zig-a2651cbc829d44df4c3773037598b30e8cf0c4da.tar.gz zig-a2651cbc829d44df4c3773037598b30e8cf0c4da.zip