diff options
Diffstat (limited to 'lib/std/hash/murmur.zig')
| -rw-r--r-- | lib/std/hash/murmur.zig | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/lib/std/hash/murmur.zig b/lib/std/hash/murmur.zig new file mode 100644 index 0000000000..a0c8f91338 --- /dev/null +++ b/lib/std/hash/murmur.zig @@ -0,0 +1,345 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const testing = std.testing; + +const default_seed: u32 = 0xc70f6907; + +pub const Murmur2_32 = struct { + const Self = @This(); + + pub fn hash(str: []const u8) u32 { + return @inlineCall(Self.hashWithSeed, str, default_seed); + } + + pub fn hashWithSeed(str: []const u8, seed: u32) u32 { + const m: u32 = 0x5bd1e995; + const len = @truncate(u32, str.len); + var h1: u32 = seed ^ len; + for (@ptrCast([*]allowzero align(1) const u32, str.ptr)[0..(len >> 2)]) |v| { + var k1: u32 = v; + if (builtin.endian == builtin.Endian.Big) + k1 = @byteSwap(u32, k1); + k1 *%= m; + k1 ^= k1 >> 24; + k1 *%= m; + h1 *%= m; + h1 ^= k1; + } + const offset = len & 0xfffffffc; + const rest = len & 3; + if (rest >= 3) { + h1 ^= @intCast(u32, str[offset + 2]) << 16; + } + if (rest >= 2) { + h1 ^= @intCast(u32, str[offset + 1]) << 8; + } + if (rest >= 1) { + h1 ^= @intCast(u32, str[offset + 0]); + h1 *%= m; + } + h1 ^= h1 >> 13; + h1 *%= m; + h1 ^= h1 >> 15; + return h1; + } + + pub fn hashUint32(v: u32) u32 { + return @inlineCall(Self.hashUint32WithSeed, v, default_seed); + } + + pub fn hashUint32WithSeed(v: u32, seed: u32) u32 { + const m: u32 = 0x5bd1e995; + const len: u32 = 4; + var h1: u32 = seed ^ len; + var k1: u32 = undefined; + k1 = v *% m; + k1 ^= k1 >> 24; + k1 *%= m; + h1 *%= m; + h1 ^= k1; + h1 ^= h1 >> 13; + h1 *%= m; + h1 ^= h1 >> 15; + return h1; + } + + pub fn hashUint64(v: u64) u32 { + return @inlineCall(Self.hashUint64WithSeed, v, default_seed); + } + + pub fn hashUint64WithSeed(v: u64, seed: u32) u32 { + const m: u32 = 0x5bd1e995; + const len: u32 = 8; + var h1: u32 = seed ^ len; + var k1: u32 = undefined; + k1 = @truncate(u32, v) *% m; + k1 ^= k1 >> 24; + k1 *%= m; + h1 *%= m; + h1 ^= k1; + k1 = @truncate(u32, v >> 32) *% m; + k1 ^= k1 >> 24; + k1 *%= m; + h1 *%= m; + h1 ^= k1; + h1 ^= h1 >> 13; + h1 *%= m; + h1 ^= h1 >> 15; + return h1; + } +}; + +pub const Murmur2_64 = struct { + const Self = @This(); + + pub fn hash(str: []const u8) u64 { + return @inlineCall(Self.hashWithSeed, str, default_seed); + } + + pub fn hashWithSeed(str: []const u8, seed: u64) u64 { + const m: u64 = 0xc6a4a7935bd1e995; + const len = u64(str.len); + var h1: u64 = seed ^ (len *% m); + for (@ptrCast([*]allowzero align(1) const u64, str.ptr)[0..@intCast(usize, len >> 3)]) |v| { + var k1: u64 = v; + if (builtin.endian == builtin.Endian.Big) + k1 = @byteSwap(u64, k1); + k1 *%= m; + k1 ^= k1 >> 47; + k1 *%= m; + h1 ^= k1; + h1 *%= m; + } + const rest = len & 7; + const offset = len - rest; + if (rest > 0) { + var k1: u64 = 0; + @memcpy(@ptrCast([*]u8, &k1), @ptrCast([*]const u8, &str[@intCast(usize, offset)]), @intCast(usize, rest)); + if (builtin.endian == builtin.Endian.Big) + k1 = @byteSwap(u64, k1); + h1 ^= k1; + h1 *%= m; + } + h1 ^= h1 >> 47; + h1 *%= m; + h1 ^= h1 >> 47; + return h1; + } + + pub fn hashUint32(v: u32) u64 { + return @inlineCall(Self.hashUint32WithSeed, v, default_seed); + } + + pub fn hashUint32WithSeed(v: u32, seed: u32) u64 { + const m: u64 = 0xc6a4a7935bd1e995; + const len: u64 = 4; + var h1: u64 = seed ^ (len *% m); + var k1: u64 = v; + h1 ^= k1; + h1 *%= m; + h1 ^= h1 >> 47; + h1 *%= m; + h1 ^= h1 >> 47; + return h1; + } + + pub fn hashUint64(v: u64) u64 { + return @inlineCall(Self.hashUint64WithSeed, v, default_seed); + } + + pub fn hashUint64WithSeed(v: u64, seed: u32) u64 { + const m: u64 = 0xc6a4a7935bd1e995; + const len: u64 = 8; + var h1: u64 = seed ^ (len *% m); + var k1: u64 = undefined; + k1 = v *% m; + k1 ^= k1 >> 47; + k1 *%= m; + h1 ^= k1; + h1 *%= m; + h1 ^= h1 >> 47; + h1 *%= m; + h1 ^= h1 >> 47; + return h1; + } +}; + +pub const Murmur3_32 = struct { + const Self = @This(); + + fn rotl32(x: u32, comptime r: u32) u32 { + return (x << r) | (x >> (32 - r)); + } + + pub fn hash(str: []const u8) u32 { + return @inlineCall(Self.hashWithSeed, str, default_seed); + } + + pub fn hashWithSeed(str: []const u8, seed: u32) u32 { + const c1: u32 = 0xcc9e2d51; + const c2: u32 = 0x1b873593; + const len = @truncate(u32, str.len); + var h1: u32 = seed; + for (@ptrCast([*]allowzero align(1) const u32, str.ptr)[0..(len >> 2)]) |v| { + var k1: u32 = v; + if (builtin.endian == builtin.Endian.Big) + k1 = @byteSwap(u32, k1); + k1 *%= c1; + k1 = rotl32(k1, 15); + k1 *%= c2; + h1 ^= k1; + h1 = rotl32(h1, 13); + h1 *%= 5; + h1 +%= 0xe6546b64; + } + { + var k1: u32 = 0; + const offset = len & 0xfffffffc; + const rest = len & 3; + if (rest == 3) { + k1 ^= @intCast(u32, str[offset + 2]) << 16; + } + if (rest >= 2) { + k1 ^= @intCast(u32, str[offset + 1]) << 8; + } + if (rest >= 1) { + k1 ^= @intCast(u32, str[offset + 0]); + k1 *%= c1; + k1 = rotl32(k1, 15); + k1 *%= c2; + h1 ^= k1; + } + } + h1 ^= len; + h1 ^= h1 >> 16; + h1 *%= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *%= 0xc2b2ae35; + h1 ^= h1 >> 16; + return h1; + } + + pub fn hashUint32(v: u32) u32 { + return @inlineCall(Self.hashUint32WithSeed, v, default_seed); + } + + pub fn hashUint32WithSeed(v: u32, seed: u32) u32 { + const c1: u32 = 0xcc9e2d51; + const c2: u32 = 0x1b873593; + const len: u32 = 4; + var h1: u32 = seed; + var k1: u32 = undefined; + k1 = v *% c1; + k1 = rotl32(k1, 15); + k1 *%= c2; + h1 ^= k1; + h1 = rotl32(h1, 13); + h1 *%= 5; + h1 +%= 0xe6546b64; + h1 ^= len; + h1 ^= h1 >> 16; + h1 *%= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *%= 0xc2b2ae35; + h1 ^= h1 >> 16; + return h1; + } + + pub fn hashUint64(v: u64) u32 { + return @inlineCall(Self.hashUint64WithSeed, v, default_seed); + } + + pub fn hashUint64WithSeed(v: u64, seed: u32) u32 { + const c1: u32 = 0xcc9e2d51; + const c2: u32 = 0x1b873593; + const len: u32 = 8; + var h1: u32 = seed; + var k1: u32 = undefined; + k1 = @truncate(u32, v) *% c1; + k1 = rotl32(k1, 15); + k1 *%= c2; + h1 ^= k1; + h1 = rotl32(h1, 13); + h1 *%= 5; + h1 +%= 0xe6546b64; + k1 = @truncate(u32, v >> 32) *% c1; + k1 = rotl32(k1, 15); + k1 *%= c2; + h1 ^= k1; + h1 = rotl32(h1, 13); + h1 *%= 5; + h1 +%= 0xe6546b64; + h1 ^= len; + h1 ^= h1 >> 16; + h1 *%= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *%= 0xc2b2ae35; + h1 ^= h1 >> 16; + return h1; + } +}; + +fn SMHasherTest(comptime hash_fn: var, comptime hashbits: u32) u32 { + const hashbytes = hashbits / 8; + var key: [256]u8 = undefined; + var hashes: [hashbytes * 256]u8 = undefined; + var final: [hashbytes]u8 = undefined; + + @memset(@ptrCast([*]u8, &key[0]), 0, @sizeOf(@typeOf(key))); + @memset(@ptrCast([*]u8, &hashes[0]), 0, @sizeOf(@typeOf(hashes))); + @memset(@ptrCast([*]u8, &final[0]), 0, @sizeOf(@typeOf(final))); + + var i: u32 = 0; + while (i < 256) : (i += 1) { + key[i] = @truncate(u8, i); + + var h = hash_fn(key[0..i], 256 - i); + if (builtin.endian == builtin.Endian.Big) + h = @byteSwap(@typeOf(h), h); + @memcpy(@ptrCast([*]u8, &hashes[i * hashbytes]), @ptrCast([*]u8, &h), hashbytes); + } + + return @truncate(u32, hash_fn(hashes, 0)); +} + +test "murmur2_32" { + testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E); + var v0: u32 = 0x12345678; + var v1: u64 = 0x1234567812345678; + var v0le: u32 = v0; + var v1le: u64 = v1; + if (builtin.endian == builtin.Endian.Big) { + v0le = @byteSwap(u32, v0le); + v1le = @byteSwap(u64, v1le); + } + testing.expectEqual(Murmur2_32.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur2_32.hashUint32(v0)); + testing.expectEqual(Murmur2_32.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur2_32.hashUint64(v1)); +} + +test "murmur2_64" { + std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804); + var v0: u32 = 0x12345678; + var v1: u64 = 0x1234567812345678; + var v0le: u32 = v0; + var v1le: u64 = v1; + if (builtin.endian == builtin.Endian.Big) { + v0le = @byteSwap(u32, v0le); + v1le = @byteSwap(u64, v1le); + } + testing.expectEqual(Murmur2_64.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur2_64.hashUint32(v0)); + testing.expectEqual(Murmur2_64.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur2_64.hashUint64(v1)); +} + +test "murmur3_32" { + std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3); + var v0: u32 = 0x12345678; + var v1: u64 = 0x1234567812345678; + var v0le: u32 = v0; + var v1le: u64 = v1; + if (builtin.endian == builtin.Endian.Big) { + v0le = @byteSwap(u32, v0le); + v1le = @byteSwap(u64, v1le); + } + testing.expectEqual(Murmur3_32.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur3_32.hashUint32(v0)); + testing.expectEqual(Murmur3_32.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur3_32.hashUint64(v1)); +} |
