diff options
| author | Francesco Alemanno <50984334+francescoalemanno@users.noreply.github.com> | 2024-10-30 14:14:12 +0100 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2024-11-24 15:27:03 -0800 |
| commit | aee6f7d7eed535597e4b506132e211b9dce311dd (patch) | |
| tree | dafd5dd769f1f0bf3bce341f79b63c596654dceb /lib/std/hash.zig | |
| parent | e2f24a2d7096e4a28ba74513ed9473da0b7fb372 (diff) | |
| download | zig-aee6f7d7eed535597e4b506132e211b9dce311dd.tar.gz zig-aee6f7d7eed535597e4b506132e211b9dce311dd.zip | |
std.hash: improve simple hashing of unsigned integers
Before, the default bit mixer was very biased, and after a
lot of searching it turns out that selecting a better solution is hard.
I wrote a custom statistical analysis taylored for bit mixers in order
to select the best one at each size (u64/u32/u16), compared a lot of
mixers, and packaged the best ones in this commit.
Diffstat (limited to 'lib/std/hash.zig')
| -rw-r--r-- | lib/std/hash.zig | 73 |
1 files changed, 66 insertions, 7 deletions
diff --git a/lib/std/hash.zig b/lib/std/hash.zig index 061744ac97..68adb1afdd 100644 --- a/lib/std/hash.zig +++ b/lib/std/hash.zig @@ -37,20 +37,79 @@ pub const XxHash3 = xxhash.XxHash3; pub const XxHash64 = xxhash.XxHash64; pub const XxHash32 = xxhash.XxHash32; +/// Deprecated: use std.hash.int(comptime T, input T) T where T is an unsigned integer type. /// This is handy if you have a u32 and want a u32 and don't want to take a /// detour through many layers of abstraction elsewhere in the std.hash /// namespace. -/// Copied from https://nullprogram.com/blog/2018/07/31/ pub fn uint32(input: u32) u32 { - var x: u32 = input; - x ^= x >> 16; - x *%= 0x7feb352d; - x ^= x >> 15; - x *%= 0x846ca68b; - x ^= x >> 16; + return int(u32, input); +} + +/// Applies a bit-mangling transformation to an unsigned integer type `T`. +/// Optimized per type: for `u16` and `u32`, Skeeto's xorshift-multiply; for `u64`, Maiga's mx3. +/// Falls back on an avalanche pattern for other unsigned types, ensuring high entropy. +/// Only unsigned types are accepted; signed types will raise a compile-time error. +pub fn int(comptime T: type, input: T) T { + const tInfo = @typeInfo(T).int; + if (tInfo.signedness != .unsigned) @compileError("type has to be unsigned integer"); + var x = input; + switch (T) { + u16 => { + //https://github.com/skeeto/hash-prospector + // 3-round xorshift-multiply (-Xn3) + // bias = 0.0045976709018820602 + x = (x ^ (x >> 7)) *% 0x2993; + x = (x ^ (x >> 5)) *% 0xe877; + x = (x ^ (x >> 9)) *% 0x0235; + x = x ^ (x >> 10); + }, + u32 => { + // https://github.com/skeeto/hash-prospector + x = (x ^ (x >> 17)) *% 0xed5ad4bb; + x = (x ^ (x >> 11)) *% 0xac4c1b51; + x = (x ^ (x >> 15)) *% 0x31848bab; + x = x ^ (x >> 14); + }, + u64 => { + // https://github.com/jonmaiga/mx3 + // https://github.com/jonmaiga/mx3/blob/48924ee743d724aea2cafd2b4249ef8df57fa8b9/mx3.h#L17 + const C = 0xbea225f9eb34556d; + x = (x ^ (x >> 32)) *% C; + x = (x ^ (x >> 29)) *% C; + x = (x ^ (x >> 32)) *% C; + x = x ^ (x >> 29); + }, + else => { + // this construction provides robust avalanche properties, but it is not optimal for any given size. + const Tsize = @bitSizeOf(T); + if (Tsize < 4) @compileError("not implemented."); + const hsize = Tsize >> 1; + const C = comptime blk: { + const max = (1 << Tsize) - 1; + var mul = 1; + while (mul * 3 < max) mul *= 3; + break :blk ((mul ^ (mul >> hsize)) | 1); + }; + inline for (0..2) |_| { + x = (x ^ (x >> hsize + 1)) *% C; + x = (x ^ (x >> hsize - 1)) *% C; + } + x ^= (x >> hsize); + }, + } return x; } +test "bit manglers" { + const expect = @import("std").testing.expect; + try expect(int(u4, 1) == 0xC); + try expect(int(u8, 1) == 0x4F); + try expect(int(u16, 1) == 0x2880); + try expect(int(u32, 1) == 0x42741D6); + try expect(int(u64, 1) == 0x71894DE00D9981F); + try expect(int(u128, 1) == 0x50BC2BB18910C3DE0BAA2CE0D0C5B83E); +} + test { _ = adler; _ = auto_hash; |
