aboutsummaryrefslogtreecommitdiff
path: root/lib/std/hash
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2019-09-26 01:54:45 -0400
committerGitHub <noreply@github.com>2019-09-26 01:54:45 -0400
commit68bb3945708c43109c48bda3664176307d45b62c (patch)
treeafb9731e10cef9d192560b52cd9ae2cf179775c4 /lib/std/hash
parent6128bc728d1e1024a178c16c2149f5b1a167a013 (diff)
parent4637e8f9699af9c3c6cf4df50ef5bb67c7a318a4 (diff)
downloadzig-68bb3945708c43109c48bda3664176307d45b62c.tar.gz
zig-68bb3945708c43109c48bda3664176307d45b62c.zip
Merge pull request #3315 from ziglang/mv-std-lib
Move std/ to lib/std/
Diffstat (limited to 'lib/std/hash')
-rw-r--r--lib/std/hash/adler.zig107
-rw-r--r--lib/std/hash/auto_hash.zig381
-rw-r--r--lib/std/hash/benchmark.zig273
-rw-r--r--lib/std/hash/cityhash.zig387
-rw-r--r--lib/std/hash/crc.zig180
-rw-r--r--lib/std/hash/fnv.zig58
-rw-r--r--lib/std/hash/murmur.zig345
-rw-r--r--lib/std/hash/siphash.zig383
-rw-r--r--lib/std/hash/wyhash.zig231
9 files changed, 2345 insertions, 0 deletions
diff --git a/lib/std/hash/adler.zig b/lib/std/hash/adler.zig
new file mode 100644
index 0000000000..3cc3171e49
--- /dev/null
+++ b/lib/std/hash/adler.zig
@@ -0,0 +1,107 @@
+// Adler32 checksum.
+//
+// https://tools.ietf.org/html/rfc1950#section-9
+// https://github.com/madler/zlib/blob/master/adler32.c
+
+const std = @import("../std.zig");
+const testing = std.testing;
+
+pub const Adler32 = struct {
+ const base = 65521;
+ const nmax = 5552;
+
+ adler: u32,
+
+ pub fn init() Adler32 {
+ return Adler32{ .adler = 1 };
+ }
+
+ // This fast variant is taken from zlib. It reduces the required modulos and unrolls longer
+ // buffer inputs and should be much quicker.
+ pub fn update(self: *Adler32, input: []const u8) void {
+ var s1 = self.adler & 0xffff;
+ var s2 = (self.adler >> 16) & 0xffff;
+
+ if (input.len == 1) {
+ s1 +%= input[0];
+ if (s1 >= base) {
+ s1 -= base;
+ }
+ s2 +%= s1;
+ if (s2 >= base) {
+ s2 -= base;
+ }
+ } else if (input.len < 16) {
+ for (input) |b| {
+ s1 +%= b;
+ s2 +%= s1;
+ }
+ if (s1 >= base) {
+ s1 -= base;
+ }
+
+ s2 %= base;
+ } else {
+ var i: usize = 0;
+ while (i + nmax <= input.len) : (i += nmax) {
+ const n = nmax / 16; // note: 16 | nmax
+
+ var rounds: usize = 0;
+ while (rounds < n) : (rounds += 1) {
+ comptime var j: usize = 0;
+ inline while (j < 16) : (j += 1) {
+ s1 +%= input[i + n * j];
+ s2 +%= s1;
+ }
+ }
+ }
+
+ if (i < input.len) {
+ while (i + 16 <= input.len) : (i += 16) {
+ comptime var j: usize = 0;
+ inline while (j < 16) : (j += 1) {
+ s1 +%= input[i + j];
+ s2 +%= s1;
+ }
+ }
+ while (i < input.len) : (i += 1) {
+ s1 +%= input[i];
+ s2 +%= s1;
+ }
+
+ s1 %= base;
+ s2 %= base;
+ }
+ }
+
+ self.adler = s1 | (s2 << 16);
+ }
+
+ pub fn final(self: *Adler32) u32 {
+ return self.adler;
+ }
+
+ pub fn hash(input: []const u8) u32 {
+ var c = Adler32.init();
+ c.update(input);
+ return c.final();
+ }
+};
+
+test "adler32 sanity" {
+ testing.expect(Adler32.hash("a") == 0x620062);
+ testing.expect(Adler32.hash("example") == 0xbc002ed);
+}
+
+test "adler32 long" {
+ const long1 = [_]u8{1} ** 1024;
+ testing.expect(Adler32.hash(long1[0..]) == 0x06780401);
+
+ const long2 = [_]u8{1} ** 1025;
+ testing.expect(Adler32.hash(long2[0..]) == 0x0a7a0402);
+}
+
+test "adler32 very long" {
+ const long = [_]u8{1} ** 5553;
+ testing.expect(Adler32.hash(long[0..]) == 0x707f15b2);
+}
diff --git a/lib/std/hash/auto_hash.zig b/lib/std/hash/auto_hash.zig
new file mode 100644
index 0000000000..8a22788e5c
--- /dev/null
+++ b/lib/std/hash/auto_hash.zig
@@ -0,0 +1,381 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const mem = std.mem;
+const meta = std.meta;
+
+/// Describes how pointer types should be hashed.
+pub const HashStrategy = enum {
+ /// Do not follow pointers, only hash their value.
+ Shallow,
+
+ /// Follow pointers, hash the pointee content.
+ /// Only dereferences one level, ie. it is changed into .Shallow when a
+ /// pointer type is encountered.
+ Deep,
+
+ /// Follow pointers, hash the pointee content.
+ /// Dereferences all pointers encountered.
+ /// Assumes no cycle.
+ DeepRecursive,
+};
+
+/// Helper function to hash a pointer and mutate the strategy if needed.
+pub fn hashPointer(hasher: var, key: var, comptime strat: HashStrategy) void {
+ const info = @typeInfo(@typeOf(key));
+
+ switch (info.Pointer.size) {
+ builtin.TypeInfo.Pointer.Size.One => switch (strat) {
+ .Shallow => hash(hasher, @ptrToInt(key), .Shallow),
+ .Deep => hash(hasher, key.*, .Shallow),
+ .DeepRecursive => hash(hasher, key.*, .DeepRecursive),
+ },
+
+ builtin.TypeInfo.Pointer.Size.Slice => switch (strat) {
+ .Shallow => {
+ hashPointer(hasher, key.ptr, .Shallow);
+ hash(hasher, key.len, .Shallow);
+ },
+ .Deep => hashArray(hasher, key, .Shallow),
+ .DeepRecursive => hashArray(hasher, key, .DeepRecursive),
+ },
+
+ builtin.TypeInfo.Pointer.Size.Many,
+ builtin.TypeInfo.Pointer.Size.C,
+ => switch (strat) {
+ .Shallow => hash(hasher, @ptrToInt(key), .Shallow),
+ else => @compileError(
+ \\ unknown-length pointers and C pointers cannot be hashed deeply.
+ \\ Consider providing your own hash function.
+ ),
+ },
+ }
+}
+
+/// Helper function to hash a set of contiguous objects, from an array or slice.
+pub fn hashArray(hasher: var, key: var, comptime strat: HashStrategy) void {
+ switch (strat) {
+ .Shallow => {
+ // TODO detect via a trait when Key has no padding bits to
+ // hash it as an array of bytes.
+ // Otherwise, hash every element.
+ for (key) |element| {
+ hash(hasher, element, .Shallow);
+ }
+ },
+ else => {
+ for (key) |element| {
+ hash(hasher, element, strat);
+ }
+ },
+ }
+}
+
+/// Provides generic hashing for any eligible type.
+/// Strategy is provided to determine if pointers should be followed or not.
+pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
+ const Key = @typeOf(key);
+ switch (@typeInfo(Key)) {
+ .NoReturn,
+ .Opaque,
+ .Undefined,
+ .ArgTuple,
+ .Void,
+ .Null,
+ .BoundFn,
+ .ComptimeFloat,
+ .ComptimeInt,
+ .Type,
+ .EnumLiteral,
+ .Frame,
+ => @compileError("cannot hash this type"),
+
+ // Help the optimizer see that hashing an int is easy by inlining!
+ // TODO Check if the situation is better after #561 is resolved.
+ .Int => @inlineCall(hasher.update, std.mem.asBytes(&key)),
+
+ .Float => |info| hash(hasher, @bitCast(@IntType(false, info.bits), key), strat),
+
+ .Bool => hash(hasher, @boolToInt(key), strat),
+ .Enum => hash(hasher, @enumToInt(key), strat),
+ .ErrorSet => hash(hasher, @errorToInt(key), strat),
+ .AnyFrame, .Fn => hash(hasher, @ptrToInt(key), strat),
+
+ .Pointer => @inlineCall(hashPointer, hasher, key, strat),
+
+ .Optional => if (key) |k| hash(hasher, k, strat),
+
+ .Array => hashArray(hasher, key, strat),
+
+ .Vector => |info| {
+ if (info.child.bit_count % 8 == 0) {
+ // If there's no unused bits in the child type, we can just hash
+ // this as an array of bytes.
+ hasher.update(mem.asBytes(&key));
+ } else {
+ // Otherwise, hash every element.
+ // TODO remove the copy to an array once field access is done.
+ const array: [info.len]info.child = key;
+ comptime var i = 0;
+ inline while (i < info.len) : (i += 1) {
+ hash(hasher, array[i], strat);
+ }
+ }
+ },
+
+ .Struct => |info| {
+ // TODO detect via a trait when Key has no padding bits to
+ // hash it as an array of bytes.
+ // Otherwise, hash every field.
+ inline for (info.fields) |field| {
+ // We reuse the hash of the previous field as the seed for the
+ // next one so that they're dependant.
+ hash(hasher, @field(key, field.name), strat);
+ }
+ },
+
+ .Union => |info| blk: {
+ if (info.tag_type) |tag_type| {
+ const tag = meta.activeTag(key);
+ const s = hash(hasher, tag, strat);
+ inline for (info.fields) |field| {
+ const enum_field = field.enum_field.?;
+ if (enum_field.value == @enumToInt(tag)) {
+ hash(hasher, @field(key, enum_field.name), strat);
+ // TODO use a labelled break when it does not crash the compiler. cf #2908
+ // break :blk;
+ return;
+ }
+ }
+ unreachable;
+ } else @compileError("cannot hash untagged union type: " ++ @typeName(Key) ++ ", provide your own hash function");
+ },
+
+ .ErrorUnion => blk: {
+ const payload = key catch |err| {
+ hash(hasher, err, strat);
+ break :blk;
+ };
+ hash(hasher, payload, strat);
+ },
+ }
+}
+
+/// Provides generic hashing for any eligible type.
+/// Only hashes `key` itself, pointers are not followed.
+/// Slices are rejected to avoid ambiguity on the user's intention.
+pub fn autoHash(hasher: var, key: var) void {
+ const Key = @typeOf(key);
+ if (comptime meta.trait.isSlice(Key)) {
+ comptime assert(@hasDecl(std, "StringHashMap")); // detect when the following message needs updated
+ const extra_help = if (Key == []const u8)
+ " Consider std.StringHashMap for hashing the contents of []const u8."
+ else
+ "";
+
+ @compileError("std.auto_hash.autoHash does not allow slices (here " ++ @typeName(Key) ++
+ ") because the intent is unclear. Consider using std.auto_hash.hash or providing your own hash function instead." ++
+ extra_help);
+ }
+
+ hash(hasher, key, .Shallow);
+}
+
+const testing = std.testing;
+const Wyhash = std.hash.Wyhash;
+
+fn testHash(key: var) u64 {
+ // Any hash could be used here, for testing autoHash.
+ var hasher = Wyhash.init(0);
+ hash(&hasher, key, .Shallow);
+ return hasher.final();
+}
+
+fn testHashShallow(key: var) u64 {
+ // Any hash could be used here, for testing autoHash.
+ var hasher = Wyhash.init(0);
+ hash(&hasher, key, .Shallow);
+ return hasher.final();
+}
+
+fn testHashDeep(key: var) u64 {
+ // Any hash could be used here, for testing autoHash.
+ var hasher = Wyhash.init(0);
+ hash(&hasher, key, .Deep);
+ return hasher.final();
+}
+
+fn testHashDeepRecursive(key: var) u64 {
+ // Any hash could be used here, for testing autoHash.
+ var hasher = Wyhash.init(0);
+ hash(&hasher, key, .DeepRecursive);
+ return hasher.final();
+}
+
+test "hash pointer" {
+ const array = [_]u32{ 123, 123, 123 };
+ const a = &array[0];
+ const b = &array[1];
+ const c = &array[2];
+ const d = a;
+
+ testing.expect(testHashShallow(a) == testHashShallow(d));
+ testing.expect(testHashShallow(a) != testHashShallow(c));
+ testing.expect(testHashShallow(a) != testHashShallow(b));
+
+ testing.expect(testHashDeep(a) == testHashDeep(a));
+ testing.expect(testHashDeep(a) == testHashDeep(c));
+ testing.expect(testHashDeep(a) == testHashDeep(b));
+
+ testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(a));
+ testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(c));
+ testing.expect(testHashDeepRecursive(a) == testHashDeepRecursive(b));
+}
+
+test "hash slice shallow" {
+ // Allocate one array dynamically so that we're assured it is not merged
+ // with the other by the optimization passes.
+ const array1 = try std.heap.direct_allocator.create([6]u32);
+ defer std.heap.direct_allocator.destroy(array1);
+ array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
+ const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
+ const a = array1[0..];
+ const b = array2[0..];
+ const c = array1[0..3];
+ testing.expect(testHashShallow(a) == testHashShallow(a));
+ testing.expect(testHashShallow(a) != testHashShallow(array1));
+ testing.expect(testHashShallow(a) != testHashShallow(b));
+ testing.expect(testHashShallow(a) != testHashShallow(c));
+}
+
+test "hash slice deep" {
+ // Allocate one array dynamically so that we're assured it is not merged
+ // with the other by the optimization passes.
+ const array1 = try std.heap.direct_allocator.create([6]u32);
+ defer std.heap.direct_allocator.destroy(array1);
+ array1.* = [_]u32{ 1, 2, 3, 4, 5, 6 };
+ const array2 = [_]u32{ 1, 2, 3, 4, 5, 6 };
+ const a = array1[0..];
+ const b = array2[0..];
+ const c = array1[0..3];
+ testing.expect(testHashDeep(a) == testHashDeep(a));
+ testing.expect(testHashDeep(a) == testHashDeep(array1));
+ testing.expect(testHashDeep(a) == testHashDeep(b));
+ testing.expect(testHashDeep(a) != testHashDeep(c));
+}
+
+test "hash struct deep" {
+ const Foo = struct {
+ a: u32,
+ b: f64,
+ c: *bool,
+
+ const Self = @This();
+
+ pub fn init(allocator: *mem.Allocator, a_: u32, b_: f64, c_: bool) !Self {
+ const ptr = try allocator.create(bool);
+ ptr.* = c_;
+ return Self{ .a = a_, .b = b_, .c = ptr };
+ }
+ };
+
+ const allocator = std.heap.direct_allocator;
+ const foo = try Foo.init(allocator, 123, 1.0, true);
+ const bar = try Foo.init(allocator, 123, 1.0, true);
+ const baz = try Foo.init(allocator, 123, 1.0, false);
+ defer allocator.destroy(foo.c);
+ defer allocator.destroy(bar.c);
+ defer allocator.destroy(baz.c);
+
+ testing.expect(testHashDeep(foo) == testHashDeep(bar));
+ testing.expect(testHashDeep(foo) != testHashDeep(baz));
+ testing.expect(testHashDeep(bar) != testHashDeep(baz));
+
+ var hasher = Wyhash.init(0);
+ const h = testHashDeep(foo);
+ autoHash(&hasher, foo.a);
+ autoHash(&hasher, foo.b);
+ autoHash(&hasher, foo.c.*);
+ testing.expectEqual(h, hasher.final());
+
+ const h2 = testHashDeepRecursive(&foo);
+ testing.expect(h2 != testHashDeep(&foo));
+ testing.expect(h2 == testHashDeep(foo));
+}
+
+test "testHash optional" {
+ const a: ?u32 = 123;
+ const b: ?u32 = null;
+ testing.expectEqual(testHash(a), testHash(u32(123)));
+ testing.expect(testHash(a) != testHash(b));
+ testing.expectEqual(testHash(b), 0);
+}
+
+test "testHash array" {
+ const a = [_]u32{ 1, 2, 3 };
+ const h = testHash(a);
+ var hasher = Wyhash.init(0);
+ autoHash(&hasher, u32(1));
+ autoHash(&hasher, u32(2));
+ autoHash(&hasher, u32(3));
+ testing.expectEqual(h, hasher.final());
+}
+
+test "testHash struct" {
+ const Foo = struct {
+ a: u32 = 1,
+ b: u32 = 2,
+ c: u32 = 3,
+ };
+ const f = Foo{};
+ const h = testHash(f);
+ var hasher = Wyhash.init(0);
+ autoHash(&hasher, u32(1));
+ autoHash(&hasher, u32(2));
+ autoHash(&hasher, u32(3));
+ testing.expectEqual(h, hasher.final());
+}
+
+test "testHash union" {
+ const Foo = union(enum) {
+ A: u32,
+ B: f32,
+ C: u32,
+ };
+
+ const a = Foo{ .A = 18 };
+ var b = Foo{ .B = 12.34 };
+ const c = Foo{ .C = 18 };
+ testing.expect(testHash(a) == testHash(a));
+ testing.expect(testHash(a) != testHash(b));
+ testing.expect(testHash(a) != testHash(c));
+
+ b = Foo{ .A = 18 };
+ testing.expect(testHash(a) == testHash(b));
+}
+
+test "testHash vector" {
+ const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
+ const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
+ testing.expect(testHash(a) == testHash(a));
+ testing.expect(testHash(a) != testHash(b));
+
+ const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
+ const d: @Vector(4, u31) = [_]u31{ 1, 2, 3, 5 };
+ testing.expect(testHash(c) == testHash(c));
+ testing.expect(testHash(c) != testHash(d));
+}
+
+test "testHash error union" {
+ const Errors = error{Test};
+ const Foo = struct {
+ a: u32 = 1,
+ b: u32 = 2,
+ c: u32 = 3,
+ };
+ const f = Foo{};
+ const g: Errors!Foo = Errors.Test;
+ testing.expect(testHash(f) != testHash(g));
+ testing.expect(testHash(f) == testHash(Foo{}));
+ testing.expect(testHash(g) == testHash(Errors.Test));
+}
diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig
new file mode 100644
index 0000000000..d110684a8e
--- /dev/null
+++ b/lib/std/hash/benchmark.zig
@@ -0,0 +1,273 @@
+// zig run benchmark.zig --release-fast --override-std-dir ..
+
+const builtin = @import("builtin");
+const std = @import("std");
+const time = std.time;
+const Timer = time.Timer;
+const hash = std.hash;
+
+const KiB = 1024;
+const MiB = 1024 * KiB;
+const GiB = 1024 * MiB;
+
+var prng = std.rand.DefaultPrng.init(0);
+
+const Hash = struct {
+ ty: type,
+ name: []const u8,
+ has_iterative_api: bool = true,
+ init_u8s: ?[]const u8 = null,
+ init_u64: ?u64 = null,
+};
+
+const siphash_key = "0123456789abcdef";
+
+const hashes = [_]Hash{
+ Hash{
+ .ty = hash.Wyhash,
+ .name = "wyhash",
+ .init_u64 = 0,
+ },
+ Hash{
+ .ty = hash.SipHash64(1, 3),
+ .name = "siphash(1,3)",
+ .init_u8s = siphash_key,
+ },
+ Hash{
+ .ty = hash.SipHash64(2, 4),
+ .name = "siphash(2,4)",
+ .init_u8s = siphash_key,
+ },
+ Hash{
+ .ty = hash.Fnv1a_64,
+ .name = "fnv1a",
+ },
+ Hash{
+ .ty = hash.Adler32,
+ .name = "adler32",
+ },
+ Hash{
+ .ty = hash.crc.Crc32WithPoly(hash.crc.Polynomial.IEEE),
+ .name = "crc32-slicing-by-8",
+ },
+ Hash{
+ .ty = hash.crc.Crc32SmallWithPoly(hash.crc.Polynomial.IEEE),
+ .name = "crc32-half-byte-lookup",
+ },
+ Hash{
+ .ty = hash.CityHash32,
+ .name = "cityhash-32",
+ .has_iterative_api = false,
+ },
+ Hash{
+ .ty = hash.CityHash64,
+ .name = "cityhash-64",
+ .has_iterative_api = false,
+ },
+ Hash{
+ .ty = hash.Murmur2_32,
+ .name = "murmur2-32",
+ .has_iterative_api = false,
+ },
+ Hash{
+ .ty = hash.Murmur2_64,
+ .name = "murmur2-64",
+ .has_iterative_api = false,
+ },
+ Hash{
+ .ty = hash.Murmur3_32,
+ .name = "murmur3-32",
+ .has_iterative_api = false,
+ },
+};
+
+const Result = struct {
+ hash: u64,
+ throughput: u64,
+};
+
+const block_size: usize = 8 * 8192;
+
+pub fn benchmarkHash(comptime H: var, bytes: usize) !Result {
+ var h = blk: {
+ if (H.init_u8s) |init| {
+ break :blk H.ty.init(init);
+ }
+ if (H.init_u64) |init| {
+ break :blk H.ty.init(init);
+ }
+ break :blk H.ty.init();
+ };
+
+ var block: [block_size]u8 = undefined;
+ prng.random.bytes(block[0..]);
+
+ var offset: usize = 0;
+ var timer = try Timer.start();
+ const start = timer.lap();
+ while (offset < bytes) : (offset += block.len) {
+ h.update(block[0..]);
+ }
+ const end = timer.read();
+
+ const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+ const throughput = @floatToInt(u64, @intToFloat(f64, bytes) / elapsed_s);
+
+ return Result{
+ .hash = h.final(),
+ .throughput = throughput,
+ };
+}
+
+pub fn benchmarkHashSmallKeys(comptime H: var, key_size: usize, bytes: usize) !Result {
+ const key_count = bytes / key_size;
+ var block: [block_size]u8 = undefined;
+ prng.random.bytes(block[0..]);
+
+ var i: usize = 0;
+ var timer = try Timer.start();
+ const start = timer.lap();
+
+ var sum: u64 = 0;
+ while (i < key_count) : (i += 1) {
+ const small_key = block[0..key_size];
+ sum +%= blk: {
+ if (H.init_u8s) |init| {
+ break :blk H.ty.hash(init, small_key);
+ }
+ if (H.init_u64) |init| {
+ break :blk H.ty.hash(init, small_key);
+ }
+ break :blk H.ty.hash(small_key);
+ };
+ }
+ const end = timer.read();
+
+ const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
+ const throughput = @floatToInt(u64, @intToFloat(f64, bytes) / elapsed_s);
+
+ return Result{
+ .hash = sum,
+ .throughput = throughput,
+ };
+}
+
+fn usage() void {
+ std.debug.warn(
+ \\throughput_test [options]
+ \\
+ \\Options:
+ \\ --filter [test-name]
+ \\ --seed [int]
+ \\ --count [int]
+ \\ --key-size [int]
+ \\ --iterative-only
+ \\ --help
+ \\
+ );
+}
+
+fn mode(comptime x: comptime_int) comptime_int {
+ return if (builtin.mode == builtin.Mode.Debug) x / 64 else x;
+}
+
+// TODO(#1358): Replace with builtin formatted padding when available.
+fn printPad(stdout: var, s: []const u8) !void {
+ var i: usize = 0;
+ while (i < 12 - s.len) : (i += 1) {
+ try stdout.print(" ");
+ }
+ try stdout.print("{}", s);
+}
+
+pub fn main() !void {
+ var stdout_file = try std.io.getStdOut();
+ var stdout_out_stream = stdout_file.outStream();
+ const stdout = &stdout_out_stream.stream;
+
+ var buffer: [1024]u8 = undefined;
+ var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]);
+ const args = try std.process.argsAlloc(&fixed.allocator);
+
+ var filter: ?[]u8 = "";
+ var count: usize = mode(128 * MiB);
+ var key_size: usize = 32;
+ var seed: u32 = 0;
+ var test_iterative_only = false;
+
+ var i: usize = 1;
+ while (i < args.len) : (i += 1) {
+ if (std.mem.eql(u8, args[i], "--mode")) {
+ try stdout.print("{}\n", builtin.mode);
+ return;
+ } else if (std.mem.eql(u8, args[i], "--seed")) {
+ i += 1;
+ if (i == args.len) {
+ usage();
+ std.os.exit(1);
+ }
+
+ seed = try std.fmt.parseUnsigned(u32, args[i], 10);
+ // we seed later
+ } else if (std.mem.eql(u8, args[i], "--filter")) {
+ i += 1;
+ if (i == args.len) {
+ usage();
+ std.os.exit(1);
+ }
+
+ filter = args[i];
+ } else if (std.mem.eql(u8, args[i], "--count")) {
+ i += 1;
+ if (i == args.len) {
+ usage();
+ std.os.exit(1);
+ }
+
+ const c = try std.fmt.parseUnsigned(usize, args[i], 10);
+ count = c * MiB;
+ } else if (std.mem.eql(u8, args[i], "--key-size")) {
+ i += 1;
+ if (i == args.len) {
+ usage();
+ std.os.exit(1);
+ }
+
+ key_size = try std.fmt.parseUnsigned(usize, args[i], 10);
+ if (key_size > block_size) {
+ try stdout.print("key_size cannot exceed block size of {}\n", block_size);
+ std.os.exit(1);
+ }
+ } else if (std.mem.eql(u8, args[i], "--iterative-only")) {
+ test_iterative_only = true;
+ } else if (std.mem.eql(u8, args[i], "--help")) {
+ usage();
+ return;
+ } else {
+ usage();
+ std.os.exit(1);
+ }
+ }
+
+ inline for (hashes) |H| {
+ if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) {
+ if (!test_iterative_only or H.has_iterative_api) {
+ try stdout.print("{}\n", H.name);
+
+ // Always reseed prior to every call so we are hashing the same buffer contents.
+ // This allows easier comparison between different implementations.
+ if (H.has_iterative_api) {
+ prng.seed(seed);
+ const result = try benchmarkHash(H, count);
+ try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash);
+ }
+
+ if (!test_iterative_only) {
+ prng.seed(seed);
+ const result_small = try benchmarkHashSmallKeys(H, key_size, count);
+ try stdout.print(" small keys: {:4} MiB/s [{x:0<16}]\n", result_small.throughput / (1 * MiB), result_small.hash);
+ }
+ }
+ }
+ }
+}
diff --git a/lib/std/hash/cityhash.zig b/lib/std/hash/cityhash.zig
new file mode 100644
index 0000000000..43e5b7a385
--- /dev/null
+++ b/lib/std/hash/cityhash.zig
@@ -0,0 +1,387 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+pub const CityHash32 = struct {
+ const Self = @This();
+
+ // Magic numbers for 32-bit hashing. Copied from Murmur3.
+ const c1: u32 = 0xcc9e2d51;
+ const c2: u32 = 0x1b873593;
+
+ fn fetch32(ptr: [*]const u8) u32 {
+ var v: u32 = undefined;
+ @memcpy(@ptrCast([*]u8, &v), ptr, 4);
+ if (builtin.endian == builtin.Endian.Big)
+ return @byteSwap(u32, v);
+ return v;
+ }
+
+ // A 32-bit to 32-bit integer hash copied from Murmur3.
+ fn fmix(h: u32) u32 {
+ var h1: u32 = h;
+ h1 ^= h1 >> 16;
+ h1 *%= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *%= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+ return h1;
+ }
+
+ // Rotate right helper
+ fn rotr32(x: u32, comptime r: u32) u32 {
+ return (x >> r) | (x << (32 - r));
+ }
+
+ // Helper from Murmur3 for combining two 32-bit values.
+ fn mur(a: u32, h: u32) u32 {
+ var a1: u32 = a;
+ var h1: u32 = h;
+ a1 *%= c1;
+ a1 = rotr32(a1, 17);
+ a1 *%= c2;
+ h1 ^= a1;
+ h1 = rotr32(h1, 19);
+ return h1 *% 5 +% 0xe6546b64;
+ }
+
+ fn hash32Len0To4(str: []const u8) u32 {
+ const len: u32 = @truncate(u32, str.len);
+ var b: u32 = 0;
+ var c: u32 = 9;
+ for (str) |v| {
+ b = b *% c1 +% @bitCast(u32, @intCast(i32, @bitCast(i8, v)));
+ c ^= b;
+ }
+ return fmix(mur(b, mur(len, c)));
+ }
+
+ fn hash32Len5To12(str: []const u8) u32 {
+ var a: u32 = @truncate(u32, str.len);
+ var b: u32 = a *% 5;
+ var c: u32 = 9;
+ const d: u32 = b;
+
+ a +%= fetch32(str.ptr);
+ b +%= fetch32(str.ptr + str.len - 4);
+ c +%= fetch32(str.ptr + ((str.len >> 1) & 4));
+
+ return fmix(mur(c, mur(b, mur(a, d))));
+ }
+
+ fn hash32Len13To24(str: []const u8) u32 {
+ const len: u32 = @truncate(u32, str.len);
+ const a: u32 = fetch32(str.ptr + (str.len >> 1) - 4);
+ const b: u32 = fetch32(str.ptr + 4);
+ const c: u32 = fetch32(str.ptr + str.len - 8);
+ const d: u32 = fetch32(str.ptr + (str.len >> 1));
+ const e: u32 = fetch32(str.ptr);
+ const f: u32 = fetch32(str.ptr + str.len - 4);
+
+ return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, len)))))));
+ }
+
+ pub fn hash(str: []const u8) u32 {
+ if (str.len <= 24) {
+ if (str.len <= 4) {
+ return hash32Len0To4(str);
+ } else {
+ if (str.len <= 12)
+ return hash32Len5To12(str);
+ return hash32Len13To24(str);
+ }
+ }
+
+ const len: u32 = @truncate(u32, str.len);
+ var h: u32 = len;
+ var g: u32 = c1 *% len;
+ var f: u32 = g;
+
+ const a0: u32 = rotr32(fetch32(str.ptr + str.len - 4) *% c1, 17) *% c2;
+ const a1: u32 = rotr32(fetch32(str.ptr + str.len - 8) *% c1, 17) *% c2;
+ const a2: u32 = rotr32(fetch32(str.ptr + str.len - 16) *% c1, 17) *% c2;
+ const a3: u32 = rotr32(fetch32(str.ptr + str.len - 12) *% c1, 17) *% c2;
+ const a4: u32 = rotr32(fetch32(str.ptr + str.len - 20) *% c1, 17) *% c2;
+
+ h ^= a0;
+ h = rotr32(h, 19);
+ h = h *% 5 +% 0xe6546b64;
+ h ^= a2;
+ h = rotr32(h, 19);
+ h = h *% 5 +% 0xe6546b64;
+ g ^= a1;
+ g = rotr32(g, 19);
+ g = g *% 5 +% 0xe6546b64;
+ g ^= a3;
+ g = rotr32(g, 19);
+ g = g *% 5 +% 0xe6546b64;
+ f +%= a4;
+ f = rotr32(f, 19);
+ f = f *% 5 +% 0xe6546b64;
+ var iters = (str.len - 1) / 20;
+ var ptr = str.ptr;
+ while (iters != 0) : (iters -= 1) {
+ const b0: u32 = rotr32(fetch32(ptr) *% c1, 17) *% c2;
+ const b1: u32 = fetch32(ptr + 4);
+ const b2: u32 = rotr32(fetch32(ptr + 8) *% c1, 17) *% c2;
+ const b3: u32 = rotr32(fetch32(ptr + 12) *% c1, 17) *% c2;
+ const b4: u32 = fetch32(ptr + 16);
+
+ h ^= b0;
+ h = rotr32(h, 18);
+ h = h *% 5 +% 0xe6546b64;
+ f +%= b1;
+ f = rotr32(f, 19);
+ f = f *% c1;
+ g +%= b2;
+ g = rotr32(g, 18);
+ g = g *% 5 +% 0xe6546b64;
+ h ^= b3 +% b1;
+ h = rotr32(h, 19);
+ h = h *% 5 +% 0xe6546b64;
+ g ^= b4;
+ g = @byteSwap(u32, g) *% 5;
+ h +%= b4 *% 5;
+ h = @byteSwap(u32, h);
+ f +%= b0;
+ const t: u32 = h;
+ h = f;
+ f = g;
+ g = t;
+ ptr += 20;
+ }
+ g = rotr32(g, 11) *% c1;
+ g = rotr32(g, 17) *% c1;
+ f = rotr32(f, 11) *% c1;
+ f = rotr32(f, 17) *% c1;
+ h = rotr32(h +% g, 19);
+ h = h *% 5 +% 0xe6546b64;
+ h = rotr32(h, 17) *% c1;
+ h = rotr32(h +% f, 19);
+ h = h *% 5 +% 0xe6546b64;
+ h = rotr32(h, 17) *% c1;
+ return h;
+ }
+};
+
+pub const CityHash64 = struct {
+ const Self = @This();
+
+ // Some primes between 2^63 and 2^64 for various uses.
+ const k0: u64 = 0xc3a5c85c97cb3127;
+ const k1: u64 = 0xb492b66fbe98f273;
+ const k2: u64 = 0x9ae16a3b2f90404f;
+
+ fn fetch32(ptr: [*]const u8) u32 {
+ var v: u32 = undefined;
+ @memcpy(@ptrCast([*]u8, &v), ptr, 4);
+ if (builtin.endian == builtin.Endian.Big)
+ return @byteSwap(u32, v);
+ return v;
+ }
+
+ fn fetch64(ptr: [*]const u8) u64 {
+ var v: u64 = undefined;
+ @memcpy(@ptrCast([*]u8, &v), ptr, 8);
+ if (builtin.endian == builtin.Endian.Big)
+ return @byteSwap(u64, v);
+ return v;
+ }
+
+ // Rotate right helper
+ fn rotr64(x: u64, comptime r: u64) u64 {
+ return (x >> r) | (x << (64 - r));
+ }
+
+ fn shiftmix(v: u64) u64 {
+ return v ^ (v >> 47);
+ }
+
+ fn hashLen16(u: u64, v: u64) u64 {
+ return @inlineCall(hash128To64, u, v);
+ }
+
+ fn hashLen16Mul(low: u64, high: u64, mul: u64) u64 {
+ var a: u64 = (low ^ high) *% mul;
+ a ^= (a >> 47);
+ var b: u64 = (high ^ a) *% mul;
+ b ^= (b >> 47);
+ b *%= mul;
+ return b;
+ }
+
+ fn hash128To64(low: u64, high: u64) u64 {
+ return @inlineCall(hashLen16Mul, low, high, 0x9ddfea08eb382d69);
+ }
+
+ fn hashLen0To16(str: []const u8) u64 {
+ const len: u64 = u64(str.len);
+ if (len >= 8) {
+ const mul: u64 = k2 +% len *% 2;
+ const a: u64 = fetch64(str.ptr) +% k2;
+ const b: u64 = fetch64(str.ptr + str.len - 8);
+ const c: u64 = rotr64(b, 37) *% mul +% a;
+ const d: u64 = (rotr64(a, 25) +% b) *% mul;
+ return hashLen16Mul(c, d, mul);
+ }
+ if (len >= 4) {
+ const mul: u64 = k2 +% len *% 2;
+ const a: u64 = fetch32(str.ptr);
+ return hashLen16Mul(len +% (a << 3), fetch32(str.ptr + str.len - 4), mul);
+ }
+ if (len > 0) {
+ const a: u8 = str[0];
+ const b: u8 = str[str.len >> 1];
+ const c: u8 = str[str.len - 1];
+ const y: u32 = @intCast(u32, a) +% (@intCast(u32, b) << 8);
+ const z: u32 = @truncate(u32, str.len) +% (@intCast(u32, c) << 2);
+ return shiftmix(@intCast(u64, y) *% k2 ^ @intCast(u64, z) *% k0) *% k2;
+ }
+ return k2;
+ }
+
+ fn hashLen17To32(str: []const u8) u64 {
+ const len: u64 = u64(str.len);
+ const mul: u64 = k2 +% len *% 2;
+ const a: u64 = fetch64(str.ptr) *% k1;
+ const b: u64 = fetch64(str.ptr + 8);
+ const c: u64 = fetch64(str.ptr + str.len - 8) *% mul;
+ const d: u64 = fetch64(str.ptr + str.len - 16) *% k2;
+
+ return hashLen16Mul(rotr64(a +% b, 43) +% rotr64(c, 30) +% d, a +% rotr64(b +% k2, 18) +% c, mul);
+ }
+
+ fn hashLen33To64(str: []const u8) u64 {
+ const len: u64 = u64(str.len);
+ const mul: u64 = k2 +% len *% 2;
+ const a: u64 = fetch64(str.ptr) *% k2;
+ const b: u64 = fetch64(str.ptr + 8);
+ const c: u64 = fetch64(str.ptr + str.len - 24);
+ const d: u64 = fetch64(str.ptr + str.len - 32);
+ const e: u64 = fetch64(str.ptr + 16) *% k2;
+ const f: u64 = fetch64(str.ptr + 24) *% 9;
+ const g: u64 = fetch64(str.ptr + str.len - 8);
+ const h: u64 = fetch64(str.ptr + str.len - 16) *% mul;
+
+ const u: u64 = rotr64(a +% g, 43) +% (rotr64(b, 30) +% c) *% 9;
+ const v: u64 = ((a +% g) ^ d) +% f +% 1;
+ const w: u64 = @byteSwap(u64, (u +% v) *% mul) +% h;
+ const x: u64 = rotr64(e +% f, 42) +% c;
+ const y: u64 = (@byteSwap(u64, (v +% w) *% mul) +% g) *% mul;
+ const z: u64 = e +% f +% c;
+ const a1: u64 = @byteSwap(u64, (x +% z) *% mul +% y) +% b;
+ const b1: u64 = shiftmix((z +% a1) *% mul +% d +% h) *% mul;
+ return b1 +% x;
+ }
+
+ const WeakPair = struct {
+ first: u64,
+ second: u64,
+ };
+
+ fn weakHashLen32WithSeedsHelper(w: u64, x: u64, y: u64, z: u64, a: u64, b: u64) WeakPair {
+ var a1: u64 = a;
+ var b1: u64 = b;
+ a1 +%= w;
+ b1 = rotr64(b1 +% a1 +% z, 21);
+ var c: u64 = a1;
+ a1 +%= x;
+ a1 +%= y;
+ b1 +%= rotr64(a1, 44);
+ return WeakPair{ .first = a1 +% z, .second = b1 +% c };
+ }
+
+ fn weakHashLen32WithSeeds(ptr: [*]const u8, a: u64, b: u64) WeakPair {
+ return @inlineCall(weakHashLen32WithSeedsHelper, fetch64(ptr), fetch64(ptr + 8), fetch64(ptr + 16), fetch64(ptr + 24), a, b);
+ }
+
+ pub fn hash(str: []const u8) u64 {
+ if (str.len <= 32) {
+ if (str.len <= 16) {
+ return hashLen0To16(str);
+ } else {
+ return hashLen17To32(str);
+ }
+ } else if (str.len <= 64) {
+ return hashLen33To64(str);
+ }
+
+ var len: u64 = u64(str.len);
+
+ var x: u64 = fetch64(str.ptr + str.len - 40);
+ var y: u64 = fetch64(str.ptr + str.len - 16) +% fetch64(str.ptr + str.len - 56);
+ var z: u64 = hashLen16(fetch64(str.ptr + str.len - 48) +% len, fetch64(str.ptr + str.len - 24));
+ var v: WeakPair = weakHashLen32WithSeeds(str.ptr + str.len - 64, len, z);
+ var w: WeakPair = weakHashLen32WithSeeds(str.ptr + str.len - 32, y +% k1, x);
+
+ x = x *% k1 +% fetch64(str.ptr);
+ len = (len - 1) & ~@intCast(u64, 63);
+
+ var ptr: [*]const u8 = str.ptr;
+ while (true) {
+ x = rotr64(x +% y +% v.first +% fetch64(ptr + 8), 37) *% k1;
+ y = rotr64(y +% v.second +% fetch64(ptr + 48), 42) *% k1;
+ x ^= w.second;
+ y +%= v.first +% fetch64(ptr + 40);
+ z = rotr64(z +% w.first, 33) *% k1;
+ v = weakHashLen32WithSeeds(ptr, v.second *% k1, x +% w.first);
+ w = weakHashLen32WithSeeds(ptr + 32, z +% w.second, y +% fetch64(ptr + 16));
+ const t: u64 = z;
+ z = x;
+ x = t;
+
+ ptr += 64;
+ len -= 64;
+ if (len == 0)
+ break;
+ }
+
+ return hashLen16(hashLen16(v.first, w.first) +% shiftmix(y) *% k1 +% z, hashLen16(v.second, w.second) +% x);
+ }
+
+ pub fn hashWithSeed(str: []const u8, seed: u64) u64 {
+ return @inlineCall(Self.hashWithSeeds, str, k2, seed);
+ }
+
+ pub fn hashWithSeeds(str: []const u8, seed0: u64, seed1: u64) u64 {
+ return hashLen16(hash(str) -% seed0, seed1);
+ }
+};
+
+fn SMHasherTest(comptime hash_fn: var, comptime hashbits: u32) u32 {
+ const hashbytes = hashbits / 8;
+ var key: [256]u8 = undefined;
+ var hashes: [hashbytes * 256]u8 = undefined;
+ var final: [hashbytes]u8 = undefined;
+
+ @memset(@ptrCast([*]u8, &key[0]), 0, @sizeOf(@typeOf(key)));
+ @memset(@ptrCast([*]u8, &hashes[0]), 0, @sizeOf(@typeOf(hashes)));
+ @memset(@ptrCast([*]u8, &final[0]), 0, @sizeOf(@typeOf(final)));
+
+ var i: u32 = 0;
+ while (i < 256) : (i += 1) {
+ key[i] = @intCast(u8, i);
+
+ var h = hash_fn(key[0..i], 256 - i);
+ if (builtin.endian == builtin.Endian.Big)
+ h = @byteSwap(@typeOf(h), h);
+ @memcpy(@ptrCast([*]u8, &hashes[i * hashbytes]), @ptrCast([*]u8, &h), hashbytes);
+ }
+
+ return @truncate(u32, hash_fn(hashes, 0));
+}
+
+fn CityHash32hashIgnoreSeed(str: []const u8, seed: u32) u32 {
+ return CityHash32.hash(str);
+}
+
+test "cityhash32" {
+ // Note: SMHasher doesn't provide a 32bit version of the algorithm.
+ // Note: The implementation was verified against the Google Abseil version.
+ std.testing.expectEqual(SMHasherTest(CityHash32hashIgnoreSeed, 32), 0x68254F81);
+}
+
+test "cityhash64" {
+ // Note: This is not compliant with the SMHasher implementation of CityHash64!
+ // Note: The implementation was verified against the Google Abseil version.
+ std.testing.expectEqual(SMHasherTest(CityHash64.hashWithSeed, 64), 0x5FABC5C5);
+}
diff --git a/lib/std/hash/crc.zig b/lib/std/hash/crc.zig
new file mode 100644
index 0000000000..cdcaf55610
--- /dev/null
+++ b/lib/std/hash/crc.zig
@@ -0,0 +1,180 @@
+// There are two implementations of CRC32 implemented with the following key characteristics:
+//
+// - Crc32WithPoly uses 8Kb of tables but is ~10x faster than the small method.
+//
+// - Crc32SmallWithPoly uses only 64 bytes of memory but is slower. Be aware that this is
+// still moderately fast just slow relative to the slicing approach.
+
+const std = @import("../std.zig");
+const debug = std.debug;
+const testing = std.testing;
+
+pub const Polynomial = struct {
+ pub const IEEE = 0xedb88320;
+ pub const Castagnoli = 0x82f63b78;
+ pub const Koopman = 0xeb31d82e;
+};
+
+// IEEE is by far the most common CRC and so is aliased by default.
+pub const Crc32 = Crc32WithPoly(Polynomial.IEEE);
+
+// slicing-by-8 crc32 implementation.
+pub fn Crc32WithPoly(comptime poly: u32) type {
+ return struct {
+ const Self = @This();
+ const lookup_tables = comptime block: {
+ @setEvalBranchQuota(20000);
+ var tables: [8][256]u32 = undefined;
+
+ for (tables[0]) |*e, i| {
+ var crc = @intCast(u32, i);
+ var j: usize = 0;
+ while (j < 8) : (j += 1) {
+ if (crc & 1 == 1) {
+ crc = (crc >> 1) ^ poly;
+ } else {
+ crc = (crc >> 1);
+ }
+ }
+ e.* = crc;
+ }
+
+ var i: usize = 0;
+ while (i < 256) : (i += 1) {
+ var crc = tables[0][i];
+ var j: usize = 1;
+ while (j < 8) : (j += 1) {
+ const index = @truncate(u8, crc);
+ crc = tables[0][index] ^ (crc >> 8);
+ tables[j][i] = crc;
+ }
+ }
+
+ break :block tables;
+ };
+
+ crc: u32,
+
+ pub fn init() Self {
+ return Self{ .crc = 0xffffffff };
+ }
+
+ pub fn update(self: *Self, input: []const u8) void {
+ var i: usize = 0;
+ while (i + 8 <= input.len) : (i += 8) {
+ const p = input[i .. i + 8];
+
+ // Unrolling this way gives ~50Mb/s increase
+ self.crc ^= (u32(p[0]) << 0);
+ self.crc ^= (u32(p[1]) << 8);
+ self.crc ^= (u32(p[2]) << 16);
+ self.crc ^= (u32(p[3]) << 24);
+
+ self.crc =
+ lookup_tables[0][p[7]] ^
+ lookup_tables[1][p[6]] ^
+ lookup_tables[2][p[5]] ^
+ lookup_tables[3][p[4]] ^
+ lookup_tables[4][@truncate(u8, self.crc >> 24)] ^
+ lookup_tables[5][@truncate(u8, self.crc >> 16)] ^
+ lookup_tables[6][@truncate(u8, self.crc >> 8)] ^
+ lookup_tables[7][@truncate(u8, self.crc >> 0)];
+ }
+
+ while (i < input.len) : (i += 1) {
+ const index = @truncate(u8, self.crc) ^ input[i];
+ self.crc = (self.crc >> 8) ^ lookup_tables[0][index];
+ }
+ }
+
+ pub fn final(self: *Self) u32 {
+ return ~self.crc;
+ }
+
+ pub fn hash(input: []const u8) u32 {
+ var c = Self.init();
+ c.update(input);
+ return c.final();
+ }
+ };
+}
+
+test "crc32 ieee" {
+ const Crc32Ieee = Crc32WithPoly(Polynomial.IEEE);
+
+ testing.expect(Crc32Ieee.hash("") == 0x00000000);
+ testing.expect(Crc32Ieee.hash("a") == 0xe8b7be43);
+ testing.expect(Crc32Ieee.hash("abc") == 0x352441c2);
+}
+
+test "crc32 castagnoli" {
+ const Crc32Castagnoli = Crc32WithPoly(Polynomial.Castagnoli);
+
+ testing.expect(Crc32Castagnoli.hash("") == 0x00000000);
+ testing.expect(Crc32Castagnoli.hash("a") == 0xc1d04330);
+ testing.expect(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
+}
+
+// half-byte lookup table implementation.
+pub fn Crc32SmallWithPoly(comptime poly: u32) type {
+ return struct {
+ const Self = @This();
+ const lookup_table = comptime block: {
+ var table: [16]u32 = undefined;
+
+ for (table) |*e, i| {
+ var crc = @intCast(u32, i * 16);
+ var j: usize = 0;
+ while (j < 8) : (j += 1) {
+ if (crc & 1 == 1) {
+ crc = (crc >> 1) ^ poly;
+ } else {
+ crc = (crc >> 1);
+ }
+ }
+ e.* = crc;
+ }
+
+ break :block table;
+ };
+
+ crc: u32,
+
+ pub fn init() Self {
+ return Self{ .crc = 0xffffffff };
+ }
+
+ pub fn update(self: *Self, input: []const u8) void {
+ for (input) |b| {
+ self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 0))] ^ (self.crc >> 4);
+ self.crc = lookup_table[@truncate(u4, self.crc ^ (b >> 4))] ^ (self.crc >> 4);
+ }
+ }
+
+ pub fn final(self: *Self) u32 {
+ return ~self.crc;
+ }
+
+ pub fn hash(input: []const u8) u32 {
+ var c = Self.init();
+ c.update(input);
+ return c.final();
+ }
+ };
+}
+
+test "small crc32 ieee" {
+ const Crc32Ieee = Crc32SmallWithPoly(Polynomial.IEEE);
+
+ testing.expect(Crc32Ieee.hash("") == 0x00000000);
+ testing.expect(Crc32Ieee.hash("a") == 0xe8b7be43);
+ testing.expect(Crc32Ieee.hash("abc") == 0x352441c2);
+}
+
+test "small crc32 castagnoli" {
+ const Crc32Castagnoli = Crc32SmallWithPoly(Polynomial.Castagnoli);
+
+ testing.expect(Crc32Castagnoli.hash("") == 0x00000000);
+ testing.expect(Crc32Castagnoli.hash("a") == 0xc1d04330);
+ testing.expect(Crc32Castagnoli.hash("abc") == 0x364b3fb7);
+}
diff --git a/lib/std/hash/fnv.zig b/lib/std/hash/fnv.zig
new file mode 100644
index 0000000000..8094134e19
--- /dev/null
+++ b/lib/std/hash/fnv.zig
@@ -0,0 +1,58 @@
+// FNV1a - Fowler-Noll-Vo hash function
+//
+// FNV1a is a fast, non-cryptographic hash function with fairly good distribution properties.
+//
+// https://tools.ietf.org/html/draft-eastlake-fnv-14
+
+const std = @import("../std.zig");
+const testing = std.testing;
+
+pub const Fnv1a_32 = Fnv1a(u32, 0x01000193, 0x811c9dc5);
+pub const Fnv1a_64 = Fnv1a(u64, 0x100000001b3, 0xcbf29ce484222325);
+pub const Fnv1a_128 = Fnv1a(u128, 0x1000000000000000000013b, 0x6c62272e07bb014262b821756295c58d);
+
+fn Fnv1a(comptime T: type, comptime prime: T, comptime offset: T) type {
+ return struct {
+ const Self = @This();
+
+ value: T,
+
+ pub fn init() Self {
+ return Self{ .value = offset };
+ }
+
+ pub fn update(self: *Self, input: []const u8) void {
+ for (input) |b| {
+ self.value ^= b;
+ self.value *%= prime;
+ }
+ }
+
+ pub fn final(self: *Self) T {
+ return self.value;
+ }
+
+ pub fn hash(input: []const u8) T {
+ var c = Self.init();
+ c.update(input);
+ return c.final();
+ }
+ };
+}
+
+test "fnv1a-32" {
+ testing.expect(Fnv1a_32.hash("") == 0x811c9dc5);
+ testing.expect(Fnv1a_32.hash("a") == 0xe40c292c);
+ testing.expect(Fnv1a_32.hash("foobar") == 0xbf9cf968);
+}
+
+test "fnv1a-64" {
+ testing.expect(Fnv1a_64.hash("") == 0xcbf29ce484222325);
+ testing.expect(Fnv1a_64.hash("a") == 0xaf63dc4c8601ec8c);
+ testing.expect(Fnv1a_64.hash("foobar") == 0x85944171f73967e8);
+}
+
+test "fnv1a-128" {
+ testing.expect(Fnv1a_128.hash("") == 0x6c62272e07bb014262b821756295c58d);
+ testing.expect(Fnv1a_128.hash("a") == 0xd228cb696f1a8caf78912b704e4a8964);
+}
diff --git a/lib/std/hash/murmur.zig b/lib/std/hash/murmur.zig
new file mode 100644
index 0000000000..a0c8f91338
--- /dev/null
+++ b/lib/std/hash/murmur.zig
@@ -0,0 +1,345 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const testing = std.testing;
+
+const default_seed: u32 = 0xc70f6907;
+
+pub const Murmur2_32 = struct {
+ const Self = @This();
+
+ pub fn hash(str: []const u8) u32 {
+ return @inlineCall(Self.hashWithSeed, str, default_seed);
+ }
+
+ pub fn hashWithSeed(str: []const u8, seed: u32) u32 {
+ const m: u32 = 0x5bd1e995;
+ const len = @truncate(u32, str.len);
+ var h1: u32 = seed ^ len;
+ for (@ptrCast([*]allowzero align(1) const u32, str.ptr)[0..(len >> 2)]) |v| {
+ var k1: u32 = v;
+ if (builtin.endian == builtin.Endian.Big)
+ k1 = @byteSwap(u32, k1);
+ k1 *%= m;
+ k1 ^= k1 >> 24;
+ k1 *%= m;
+ h1 *%= m;
+ h1 ^= k1;
+ }
+ const offset = len & 0xfffffffc;
+ const rest = len & 3;
+ if (rest >= 3) {
+ h1 ^= @intCast(u32, str[offset + 2]) << 16;
+ }
+ if (rest >= 2) {
+ h1 ^= @intCast(u32, str[offset + 1]) << 8;
+ }
+ if (rest >= 1) {
+ h1 ^= @intCast(u32, str[offset + 0]);
+ h1 *%= m;
+ }
+ h1 ^= h1 >> 13;
+ h1 *%= m;
+ h1 ^= h1 >> 15;
+ return h1;
+ }
+
+ pub fn hashUint32(v: u32) u32 {
+ return @inlineCall(Self.hashUint32WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint32WithSeed(v: u32, seed: u32) u32 {
+ const m: u32 = 0x5bd1e995;
+ const len: u32 = 4;
+ var h1: u32 = seed ^ len;
+ var k1: u32 = undefined;
+ k1 = v *% m;
+ k1 ^= k1 >> 24;
+ k1 *%= m;
+ h1 *%= m;
+ h1 ^= k1;
+ h1 ^= h1 >> 13;
+ h1 *%= m;
+ h1 ^= h1 >> 15;
+ return h1;
+ }
+
+ pub fn hashUint64(v: u64) u32 {
+ return @inlineCall(Self.hashUint64WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint64WithSeed(v: u64, seed: u32) u32 {
+ const m: u32 = 0x5bd1e995;
+ const len: u32 = 8;
+ var h1: u32 = seed ^ len;
+ var k1: u32 = undefined;
+ k1 = @truncate(u32, v) *% m;
+ k1 ^= k1 >> 24;
+ k1 *%= m;
+ h1 *%= m;
+ h1 ^= k1;
+ k1 = @truncate(u32, v >> 32) *% m;
+ k1 ^= k1 >> 24;
+ k1 *%= m;
+ h1 *%= m;
+ h1 ^= k1;
+ h1 ^= h1 >> 13;
+ h1 *%= m;
+ h1 ^= h1 >> 15;
+ return h1;
+ }
+};
+
+pub const Murmur2_64 = struct {
+ const Self = @This();
+
+ pub fn hash(str: []const u8) u64 {
+ return @inlineCall(Self.hashWithSeed, str, default_seed);
+ }
+
+ pub fn hashWithSeed(str: []const u8, seed: u64) u64 {
+ const m: u64 = 0xc6a4a7935bd1e995;
+ const len = u64(str.len);
+ var h1: u64 = seed ^ (len *% m);
+ for (@ptrCast([*]allowzero align(1) const u64, str.ptr)[0..@intCast(usize, len >> 3)]) |v| {
+ var k1: u64 = v;
+ if (builtin.endian == builtin.Endian.Big)
+ k1 = @byteSwap(u64, k1);
+ k1 *%= m;
+ k1 ^= k1 >> 47;
+ k1 *%= m;
+ h1 ^= k1;
+ h1 *%= m;
+ }
+ const rest = len & 7;
+ const offset = len - rest;
+ if (rest > 0) {
+ var k1: u64 = 0;
+ @memcpy(@ptrCast([*]u8, &k1), @ptrCast([*]const u8, &str[@intCast(usize, offset)]), @intCast(usize, rest));
+ if (builtin.endian == builtin.Endian.Big)
+ k1 = @byteSwap(u64, k1);
+ h1 ^= k1;
+ h1 *%= m;
+ }
+ h1 ^= h1 >> 47;
+ h1 *%= m;
+ h1 ^= h1 >> 47;
+ return h1;
+ }
+
+ pub fn hashUint32(v: u32) u64 {
+ return @inlineCall(Self.hashUint32WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint32WithSeed(v: u32, seed: u32) u64 {
+ const m: u64 = 0xc6a4a7935bd1e995;
+ const len: u64 = 4;
+ var h1: u64 = seed ^ (len *% m);
+ var k1: u64 = v;
+ h1 ^= k1;
+ h1 *%= m;
+ h1 ^= h1 >> 47;
+ h1 *%= m;
+ h1 ^= h1 >> 47;
+ return h1;
+ }
+
+ pub fn hashUint64(v: u64) u64 {
+ return @inlineCall(Self.hashUint64WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint64WithSeed(v: u64, seed: u32) u64 {
+ const m: u64 = 0xc6a4a7935bd1e995;
+ const len: u64 = 8;
+ var h1: u64 = seed ^ (len *% m);
+ var k1: u64 = undefined;
+ k1 = v *% m;
+ k1 ^= k1 >> 47;
+ k1 *%= m;
+ h1 ^= k1;
+ h1 *%= m;
+ h1 ^= h1 >> 47;
+ h1 *%= m;
+ h1 ^= h1 >> 47;
+ return h1;
+ }
+};
+
+pub const Murmur3_32 = struct {
+ const Self = @This();
+
+ fn rotl32(x: u32, comptime r: u32) u32 {
+ return (x << r) | (x >> (32 - r));
+ }
+
+ pub fn hash(str: []const u8) u32 {
+ return @inlineCall(Self.hashWithSeed, str, default_seed);
+ }
+
+ pub fn hashWithSeed(str: []const u8, seed: u32) u32 {
+ const c1: u32 = 0xcc9e2d51;
+ const c2: u32 = 0x1b873593;
+ const len = @truncate(u32, str.len);
+ var h1: u32 = seed;
+ for (@ptrCast([*]allowzero align(1) const u32, str.ptr)[0..(len >> 2)]) |v| {
+ var k1: u32 = v;
+ if (builtin.endian == builtin.Endian.Big)
+ k1 = @byteSwap(u32, k1);
+ k1 *%= c1;
+ k1 = rotl32(k1, 15);
+ k1 *%= c2;
+ h1 ^= k1;
+ h1 = rotl32(h1, 13);
+ h1 *%= 5;
+ h1 +%= 0xe6546b64;
+ }
+ {
+ var k1: u32 = 0;
+ const offset = len & 0xfffffffc;
+ const rest = len & 3;
+ if (rest == 3) {
+ k1 ^= @intCast(u32, str[offset + 2]) << 16;
+ }
+ if (rest >= 2) {
+ k1 ^= @intCast(u32, str[offset + 1]) << 8;
+ }
+ if (rest >= 1) {
+ k1 ^= @intCast(u32, str[offset + 0]);
+ k1 *%= c1;
+ k1 = rotl32(k1, 15);
+ k1 *%= c2;
+ h1 ^= k1;
+ }
+ }
+ h1 ^= len;
+ h1 ^= h1 >> 16;
+ h1 *%= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *%= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+ return h1;
+ }
+
+ pub fn hashUint32(v: u32) u32 {
+ return @inlineCall(Self.hashUint32WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint32WithSeed(v: u32, seed: u32) u32 {
+ const c1: u32 = 0xcc9e2d51;
+ const c2: u32 = 0x1b873593;
+ const len: u32 = 4;
+ var h1: u32 = seed;
+ var k1: u32 = undefined;
+ k1 = v *% c1;
+ k1 = rotl32(k1, 15);
+ k1 *%= c2;
+ h1 ^= k1;
+ h1 = rotl32(h1, 13);
+ h1 *%= 5;
+ h1 +%= 0xe6546b64;
+ h1 ^= len;
+ h1 ^= h1 >> 16;
+ h1 *%= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *%= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+ return h1;
+ }
+
+ pub fn hashUint64(v: u64) u32 {
+ return @inlineCall(Self.hashUint64WithSeed, v, default_seed);
+ }
+
+ pub fn hashUint64WithSeed(v: u64, seed: u32) u32 {
+ const c1: u32 = 0xcc9e2d51;
+ const c2: u32 = 0x1b873593;
+ const len: u32 = 8;
+ var h1: u32 = seed;
+ var k1: u32 = undefined;
+ k1 = @truncate(u32, v) *% c1;
+ k1 = rotl32(k1, 15);
+ k1 *%= c2;
+ h1 ^= k1;
+ h1 = rotl32(h1, 13);
+ h1 *%= 5;
+ h1 +%= 0xe6546b64;
+ k1 = @truncate(u32, v >> 32) *% c1;
+ k1 = rotl32(k1, 15);
+ k1 *%= c2;
+ h1 ^= k1;
+ h1 = rotl32(h1, 13);
+ h1 *%= 5;
+ h1 +%= 0xe6546b64;
+ h1 ^= len;
+ h1 ^= h1 >> 16;
+ h1 *%= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *%= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+ return h1;
+ }
+};
+
+fn SMHasherTest(comptime hash_fn: var, comptime hashbits: u32) u32 {
+ const hashbytes = hashbits / 8;
+ var key: [256]u8 = undefined;
+ var hashes: [hashbytes * 256]u8 = undefined;
+ var final: [hashbytes]u8 = undefined;
+
+ @memset(@ptrCast([*]u8, &key[0]), 0, @sizeOf(@typeOf(key)));
+ @memset(@ptrCast([*]u8, &hashes[0]), 0, @sizeOf(@typeOf(hashes)));
+ @memset(@ptrCast([*]u8, &final[0]), 0, @sizeOf(@typeOf(final)));
+
+ var i: u32 = 0;
+ while (i < 256) : (i += 1) {
+ key[i] = @truncate(u8, i);
+
+ var h = hash_fn(key[0..i], 256 - i);
+ if (builtin.endian == builtin.Endian.Big)
+ h = @byteSwap(@typeOf(h), h);
+ @memcpy(@ptrCast([*]u8, &hashes[i * hashbytes]), @ptrCast([*]u8, &h), hashbytes);
+ }
+
+ return @truncate(u32, hash_fn(hashes, 0));
+}
+
+test "murmur2_32" {
+ testing.expectEqual(SMHasherTest(Murmur2_32.hashWithSeed, 32), 0x27864C1E);
+ var v0: u32 = 0x12345678;
+ var v1: u64 = 0x1234567812345678;
+ var v0le: u32 = v0;
+ var v1le: u64 = v1;
+ if (builtin.endian == builtin.Endian.Big) {
+ v0le = @byteSwap(u32, v0le);
+ v1le = @byteSwap(u64, v1le);
+ }
+ testing.expectEqual(Murmur2_32.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur2_32.hashUint32(v0));
+ testing.expectEqual(Murmur2_32.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur2_32.hashUint64(v1));
+}
+
+test "murmur2_64" {
+ std.testing.expectEqual(SMHasherTest(Murmur2_64.hashWithSeed, 64), 0x1F0D3804);
+ var v0: u32 = 0x12345678;
+ var v1: u64 = 0x1234567812345678;
+ var v0le: u32 = v0;
+ var v1le: u64 = v1;
+ if (builtin.endian == builtin.Endian.Big) {
+ v0le = @byteSwap(u32, v0le);
+ v1le = @byteSwap(u64, v1le);
+ }
+ testing.expectEqual(Murmur2_64.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur2_64.hashUint32(v0));
+ testing.expectEqual(Murmur2_64.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur2_64.hashUint64(v1));
+}
+
+test "murmur3_32" {
+ std.testing.expectEqual(SMHasherTest(Murmur3_32.hashWithSeed, 32), 0xB0F57EE3);
+ var v0: u32 = 0x12345678;
+ var v1: u64 = 0x1234567812345678;
+ var v0le: u32 = v0;
+ var v1le: u64 = v1;
+ if (builtin.endian == builtin.Endian.Big) {
+ v0le = @byteSwap(u32, v0le);
+ v1le = @byteSwap(u64, v1le);
+ }
+ testing.expectEqual(Murmur3_32.hash(@ptrCast([*]u8, &v0le)[0..4]), Murmur3_32.hashUint32(v0));
+ testing.expectEqual(Murmur3_32.hash(@ptrCast([*]u8, &v1le)[0..8]), Murmur3_32.hashUint64(v1));
+}
diff --git a/lib/std/hash/siphash.zig b/lib/std/hash/siphash.zig
new file mode 100644
index 0000000000..3d67ba685b
--- /dev/null
+++ b/lib/std/hash/siphash.zig
@@ -0,0 +1,383 @@
+// Siphash
+//
+// SipHash is a moderately fast, non-cryptographic keyed hash function designed for resistance
+// against hash flooding DoS attacks.
+//
+// https://131002.net/siphash/
+
+const std = @import("../std.zig");
+const assert = std.debug.assert;
+const testing = std.testing;
+const math = std.math;
+const mem = std.mem;
+
+const Endian = @import("builtin").Endian;
+
+pub fn SipHash64(comptime c_rounds: usize, comptime d_rounds: usize) type {
+ return SipHash(u64, c_rounds, d_rounds);
+}
+
+pub fn SipHash128(comptime c_rounds: usize, comptime d_rounds: usize) type {
+ return SipHash(u128, c_rounds, d_rounds);
+}
+
+fn SipHashStateless(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type {
+ assert(T == u64 or T == u128);
+ assert(c_rounds > 0 and d_rounds > 0);
+
+ return struct {
+ const Self = @This();
+ const digest_size = 64;
+ const block_size = 64;
+
+ v0: u64,
+ v1: u64,
+ v2: u64,
+ v3: u64,
+ msg_len: u8,
+
+ pub fn init(key: []const u8) Self {
+ assert(key.len >= 16);
+
+ const k0 = mem.readIntSliceLittle(u64, key[0..8]);
+ const k1 = mem.readIntSliceLittle(u64, key[8..16]);
+
+ var d = Self{
+ .v0 = k0 ^ 0x736f6d6570736575,
+ .v1 = k1 ^ 0x646f72616e646f6d,
+ .v2 = k0 ^ 0x6c7967656e657261,
+ .v3 = k1 ^ 0x7465646279746573,
+ .msg_len = 0,
+ };
+
+ if (T == u128) {
+ d.v1 ^= 0xee;
+ }
+
+ return d;
+ }
+
+ pub fn update(self: *Self, b: []const u8) void {
+ std.debug.assert(b.len % 8 == 0);
+
+ var off: usize = 0;
+ while (off < b.len) : (off += 8) {
+ @inlineCall(self.round, b[off .. off + 8]);
+ }
+
+ self.msg_len +%= @truncate(u8, b.len);
+ }
+
+ pub fn final(self: *Self, b: []const u8) T {
+ std.debug.assert(b.len < 8);
+
+ self.msg_len +%= @truncate(u8, b.len);
+
+ var buf = [_]u8{0} ** 8;
+ mem.copy(u8, buf[0..], b[0..]);
+ buf[7] = self.msg_len;
+ self.round(buf[0..]);
+
+ if (T == u128) {
+ self.v2 ^= 0xee;
+ } else {
+ self.v2 ^= 0xff;
+ }
+
+ comptime var i: usize = 0;
+ inline while (i < d_rounds) : (i += 1) {
+ @inlineCall(sipRound, self);
+ }
+
+ const b1 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3;
+ if (T == u64) {
+ return b1;
+ }
+
+ self.v1 ^= 0xdd;
+
+ comptime var j: usize = 0;
+ inline while (j < d_rounds) : (j += 1) {
+ @inlineCall(sipRound, self);
+ }
+
+ const b2 = self.v0 ^ self.v1 ^ self.v2 ^ self.v3;
+ return (u128(b2) << 64) | b1;
+ }
+
+ fn round(self: *Self, b: []const u8) void {
+ assert(b.len == 8);
+
+ const m = mem.readIntSliceLittle(u64, b[0..]);
+ self.v3 ^= m;
+
+ comptime var i: usize = 0;
+ inline while (i < c_rounds) : (i += 1) {
+ @inlineCall(sipRound, self);
+ }
+
+ self.v0 ^= m;
+ }
+
+ fn sipRound(d: *Self) void {
+ d.v0 +%= d.v1;
+ d.v1 = math.rotl(u64, d.v1, u64(13));
+ d.v1 ^= d.v0;
+ d.v0 = math.rotl(u64, d.v0, u64(32));
+ d.v2 +%= d.v3;
+ d.v3 = math.rotl(u64, d.v3, u64(16));
+ d.v3 ^= d.v2;
+ d.v0 +%= d.v3;
+ d.v3 = math.rotl(u64, d.v3, u64(21));
+ d.v3 ^= d.v0;
+ d.v2 +%= d.v1;
+ d.v1 = math.rotl(u64, d.v1, u64(17));
+ d.v1 ^= d.v2;
+ d.v2 = math.rotl(u64, d.v2, u64(32));
+ }
+
+ pub fn hash(key: []const u8, input: []const u8) T {
+ const aligned_len = input.len - (input.len % 8);
+
+ var c = Self.init(key);
+ @inlineCall(c.update, input[0..aligned_len]);
+ return @inlineCall(c.final, input[aligned_len..]);
+ }
+ };
+}
+
+pub fn SipHash(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) type {
+ assert(T == u64 or T == u128);
+ assert(c_rounds > 0 and d_rounds > 0);
+
+ return struct {
+ const State = SipHashStateless(T, c_rounds, d_rounds);
+ const Self = @This();
+ const digest_size = 64;
+ const block_size = 64;
+
+ state: State,
+ buf: [8]u8,
+ buf_len: usize,
+
+ pub fn init(key: []const u8) Self {
+ return Self{
+ .state = State.init(key),
+ .buf = undefined,
+ .buf_len = 0,
+ };
+ }
+
+ pub fn update(self: *Self, b: []const u8) void {
+ var off: usize = 0;
+
+ if (self.buf_len != 0 and self.buf_len + b.len >= 8) {
+ off += 8 - self.buf_len;
+ mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
+ self.state.update(self.buf[0..]);
+ self.buf_len = 0;
+ }
+
+ const remain_len = b.len - off;
+ const aligned_len = remain_len - (remain_len % 8);
+ self.state.update(b[off .. off + aligned_len]);
+
+ mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]);
+ self.buf_len += @intCast(u8, b[off + aligned_len ..].len);
+ }
+
+ pub fn final(self: *Self) T {
+ return self.state.final(self.buf[0..self.buf_len]);
+ }
+
+ pub fn hash(key: []const u8, input: []const u8) T {
+ return State.hash(key, input);
+ }
+ };
+}
+
+// Test vectors from reference implementation.
+// https://github.com/veorq/SipHash/blob/master/vectors.h
+const test_key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f";
+
+test "siphash64-2-4 sanity" {
+ const vectors = [_][8]u8{
+ "\x31\x0e\x0e\xdd\x47\xdb\x6f\x72", // ""
+ "\xfd\x67\xdc\x93\xc5\x39\xf8\x74", // "\x00"
+ "\x5a\x4f\xa9\xd9\x09\x80\x6c\x0d", // "\x00\x01" ... etc
+ "\x2d\x7e\xfb\xd7\x96\x66\x67\x85",
+ "\xb7\x87\x71\x27\xe0\x94\x27\xcf",
+ "\x8d\xa6\x99\xcd\x64\x55\x76\x18",
+ "\xce\xe3\xfe\x58\x6e\x46\xc9\xcb",
+ "\x37\xd1\x01\x8b\xf5\x00\x02\xab",
+ "\x62\x24\x93\x9a\x79\xf5\xf5\x93",
+ "\xb0\xe4\xa9\x0b\xdf\x82\x00\x9e",
+ "\xf3\xb9\xdd\x94\xc5\xbb\x5d\x7a",
+ "\xa7\xad\x6b\x22\x46\x2f\xb3\xf4",
+ "\xfb\xe5\x0e\x86\xbc\x8f\x1e\x75",
+ "\x90\x3d\x84\xc0\x27\x56\xea\x14",
+ "\xee\xf2\x7a\x8e\x90\xca\x23\xf7",
+ "\xe5\x45\xbe\x49\x61\xca\x29\xa1",
+ "\xdb\x9b\xc2\x57\x7f\xcc\x2a\x3f",
+ "\x94\x47\xbe\x2c\xf5\xe9\x9a\x69",
+ "\x9c\xd3\x8d\x96\xf0\xb3\xc1\x4b",
+ "\xbd\x61\x79\xa7\x1d\xc9\x6d\xbb",
+ "\x98\xee\xa2\x1a\xf2\x5c\xd6\xbe",
+ "\xc7\x67\x3b\x2e\xb0\xcb\xf2\xd0",
+ "\x88\x3e\xa3\xe3\x95\x67\x53\x93",
+ "\xc8\xce\x5c\xcd\x8c\x03\x0c\xa8",
+ "\x94\xaf\x49\xf6\xc6\x50\xad\xb8",
+ "\xea\xb8\x85\x8a\xde\x92\xe1\xbc",
+ "\xf3\x15\xbb\x5b\xb8\x35\xd8\x17",
+ "\xad\xcf\x6b\x07\x63\x61\x2e\x2f",
+ "\xa5\xc9\x1d\xa7\xac\xaa\x4d\xde",
+ "\x71\x65\x95\x87\x66\x50\xa2\xa6",
+ "\x28\xef\x49\x5c\x53\xa3\x87\xad",
+ "\x42\xc3\x41\xd8\xfa\x92\xd8\x32",
+ "\xce\x7c\xf2\x72\x2f\x51\x27\x71",
+ "\xe3\x78\x59\xf9\x46\x23\xf3\xa7",
+ "\x38\x12\x05\xbb\x1a\xb0\xe0\x12",
+ "\xae\x97\xa1\x0f\xd4\x34\xe0\x15",
+ "\xb4\xa3\x15\x08\xbe\xff\x4d\x31",
+ "\x81\x39\x62\x29\xf0\x90\x79\x02",
+ "\x4d\x0c\xf4\x9e\xe5\xd4\xdc\xca",
+ "\x5c\x73\x33\x6a\x76\xd8\xbf\x9a",
+ "\xd0\xa7\x04\x53\x6b\xa9\x3e\x0e",
+ "\x92\x59\x58\xfc\xd6\x42\x0c\xad",
+ "\xa9\x15\xc2\x9b\xc8\x06\x73\x18",
+ "\x95\x2b\x79\xf3\xbc\x0a\xa6\xd4",
+ "\xf2\x1d\xf2\xe4\x1d\x45\x35\xf9",
+ "\x87\x57\x75\x19\x04\x8f\x53\xa9",
+ "\x10\xa5\x6c\xf5\xdf\xcd\x9a\xdb",
+ "\xeb\x75\x09\x5c\xcd\x98\x6c\xd0",
+ "\x51\xa9\xcb\x9e\xcb\xa3\x12\xe6",
+ "\x96\xaf\xad\xfc\x2c\xe6\x66\xc7",
+ "\x72\xfe\x52\x97\x5a\x43\x64\xee",
+ "\x5a\x16\x45\xb2\x76\xd5\x92\xa1",
+ "\xb2\x74\xcb\x8e\xbf\x87\x87\x0a",
+ "\x6f\x9b\xb4\x20\x3d\xe7\xb3\x81",
+ "\xea\xec\xb2\xa3\x0b\x22\xa8\x7f",
+ "\x99\x24\xa4\x3c\xc1\x31\x57\x24",
+ "\xbd\x83\x8d\x3a\xaf\xbf\x8d\xb7",
+ "\x0b\x1a\x2a\x32\x65\xd5\x1a\xea",
+ "\x13\x50\x79\xa3\x23\x1c\xe6\x60",
+ "\x93\x2b\x28\x46\xe4\xd7\x06\x66",
+ "\xe1\x91\x5f\x5c\xb1\xec\xa4\x6c",
+ "\xf3\x25\x96\x5c\xa1\x6d\x62\x9f",
+ "\x57\x5f\xf2\x8e\x60\x38\x1b\xe5",
+ "\x72\x45\x06\xeb\x4c\x32\x8a\x95",
+ };
+
+ const siphash = SipHash64(2, 4);
+
+ var buffer: [64]u8 = undefined;
+ for (vectors) |vector, i| {
+ buffer[i] = @intCast(u8, i);
+
+ const expected = mem.readIntLittle(u64, &vector);
+ testing.expectEqual(siphash.hash(test_key, buffer[0..i]), expected);
+ }
+}
+
+test "siphash128-2-4 sanity" {
+ const vectors = [_][16]u8{
+ "\xa3\x81\x7f\x04\xba\x25\xa8\xe6\x6d\xf6\x72\x14\xc7\x55\x02\x93",
+ "\xda\x87\xc1\xd8\x6b\x99\xaf\x44\x34\x76\x59\x11\x9b\x22\xfc\x45",
+ "\x81\x77\x22\x8d\xa4\xa4\x5d\xc7\xfc\xa3\x8b\xde\xf6\x0a\xff\xe4",
+ "\x9c\x70\xb6\x0c\x52\x67\xa9\x4e\x5f\x33\xb6\xb0\x29\x85\xed\x51",
+ "\xf8\x81\x64\xc1\x2d\x9c\x8f\xaf\x7d\x0f\x6e\x7c\x7b\xcd\x55\x79",
+ "\x13\x68\x87\x59\x80\x77\x6f\x88\x54\x52\x7a\x07\x69\x0e\x96\x27",
+ "\x14\xee\xca\x33\x8b\x20\x86\x13\x48\x5e\xa0\x30\x8f\xd7\xa1\x5e",
+ "\xa1\xf1\xeb\xbe\xd8\xdb\xc1\x53\xc0\xb8\x4a\xa6\x1f\xf0\x82\x39",
+ "\x3b\x62\xa9\xba\x62\x58\xf5\x61\x0f\x83\xe2\x64\xf3\x14\x97\xb4",
+ "\x26\x44\x99\x06\x0a\xd9\xba\xab\xc4\x7f\x8b\x02\xbb\x6d\x71\xed",
+ "\x00\x11\x0d\xc3\x78\x14\x69\x56\xc9\x54\x47\xd3\xf3\xd0\xfb\xba",
+ "\x01\x51\xc5\x68\x38\x6b\x66\x77\xa2\xb4\xdc\x6f\x81\xe5\xdc\x18",
+ "\xd6\x26\xb2\x66\x90\x5e\xf3\x58\x82\x63\x4d\xf6\x85\x32\xc1\x25",
+ "\x98\x69\xe2\x47\xe9\xc0\x8b\x10\xd0\x29\x93\x4f\xc4\xb9\x52\xf7",
+ "\x31\xfc\xef\xac\x66\xd7\xde\x9c\x7e\xc7\x48\x5f\xe4\x49\x49\x02",
+ "\x54\x93\xe9\x99\x33\xb0\xa8\x11\x7e\x08\xec\x0f\x97\xcf\xc3\xd9",
+ "\x6e\xe2\xa4\xca\x67\xb0\x54\xbb\xfd\x33\x15\xbf\x85\x23\x05\x77",
+ "\x47\x3d\x06\xe8\x73\x8d\xb8\x98\x54\xc0\x66\xc4\x7a\xe4\x77\x40",
+ "\xa4\x26\xe5\xe4\x23\xbf\x48\x85\x29\x4d\xa4\x81\xfe\xae\xf7\x23",
+ "\x78\x01\x77\x31\xcf\x65\xfa\xb0\x74\xd5\x20\x89\x52\x51\x2e\xb1",
+ "\x9e\x25\xfc\x83\x3f\x22\x90\x73\x3e\x93\x44\xa5\xe8\x38\x39\xeb",
+ "\x56\x8e\x49\x5a\xbe\x52\x5a\x21\x8a\x22\x14\xcd\x3e\x07\x1d\x12",
+ "\x4a\x29\xb5\x45\x52\xd1\x6b\x9a\x46\x9c\x10\x52\x8e\xff\x0a\xae",
+ "\xc9\xd1\x84\xdd\xd5\xa9\xf5\xe0\xcf\x8c\xe2\x9a\x9a\xbf\x69\x1c",
+ "\x2d\xb4\x79\xae\x78\xbd\x50\xd8\x88\x2a\x8a\x17\x8a\x61\x32\xad",
+ "\x8e\xce\x5f\x04\x2d\x5e\x44\x7b\x50\x51\xb9\xea\xcb\x8d\x8f\x6f",
+ "\x9c\x0b\x53\xb4\xb3\xc3\x07\xe8\x7e\xae\xe0\x86\x78\x14\x1f\x66",
+ "\xab\xf2\x48\xaf\x69\xa6\xea\xe4\xbf\xd3\xeb\x2f\x12\x9e\xeb\x94",
+ "\x06\x64\xda\x16\x68\x57\x4b\x88\xb9\x35\xf3\x02\x73\x58\xae\xf4",
+ "\xaa\x4b\x9d\xc4\xbf\x33\x7d\xe9\x0c\xd4\xfd\x3c\x46\x7c\x6a\xb7",
+ "\xea\x5c\x7f\x47\x1f\xaf\x6b\xde\x2b\x1a\xd7\xd4\x68\x6d\x22\x87",
+ "\x29\x39\xb0\x18\x32\x23\xfa\xfc\x17\x23\xde\x4f\x52\xc4\x3d\x35",
+ "\x7c\x39\x56\xca\x5e\xea\xfc\x3e\x36\x3e\x9d\x55\x65\x46\xeb\x68",
+ "\x77\xc6\x07\x71\x46\xf0\x1c\x32\xb6\xb6\x9d\x5f\x4e\xa9\xff\xcf",
+ "\x37\xa6\x98\x6c\xb8\x84\x7e\xdf\x09\x25\xf0\xf1\x30\x9b\x54\xde",
+ "\xa7\x05\xf0\xe6\x9d\xa9\xa8\xf9\x07\x24\x1a\x2e\x92\x3c\x8c\xc8",
+ "\x3d\xc4\x7d\x1f\x29\xc4\x48\x46\x1e\x9e\x76\xed\x90\x4f\x67\x11",
+ "\x0d\x62\xbf\x01\xe6\xfc\x0e\x1a\x0d\x3c\x47\x51\xc5\xd3\x69\x2b",
+ "\x8c\x03\x46\x8b\xca\x7c\x66\x9e\xe4\xfd\x5e\x08\x4b\xbe\xe7\xb5",
+ "\x52\x8a\x5b\xb9\x3b\xaf\x2c\x9c\x44\x73\xcc\xe5\xd0\xd2\x2b\xd9",
+ "\xdf\x6a\x30\x1e\x95\xc9\x5d\xad\x97\xae\x0c\xc8\xc6\x91\x3b\xd8",
+ "\x80\x11\x89\x90\x2c\x85\x7f\x39\xe7\x35\x91\x28\x5e\x70\xb6\xdb",
+ "\xe6\x17\x34\x6a\xc9\xc2\x31\xbb\x36\x50\xae\x34\xcc\xca\x0c\x5b",
+ "\x27\xd9\x34\x37\xef\xb7\x21\xaa\x40\x18\x21\xdc\xec\x5a\xdf\x89",
+ "\x89\x23\x7d\x9d\xed\x9c\x5e\x78\xd8\xb1\xc9\xb1\x66\xcc\x73\x42",
+ "\x4a\x6d\x80\x91\xbf\x5e\x7d\x65\x11\x89\xfa\x94\xa2\x50\xb1\x4c",
+ "\x0e\x33\xf9\x60\x55\xe7\xae\x89\x3f\xfc\x0e\x3d\xcf\x49\x29\x02",
+ "\xe6\x1c\x43\x2b\x72\x0b\x19\xd1\x8e\xc8\xd8\x4b\xdc\x63\x15\x1b",
+ "\xf7\xe5\xae\xf5\x49\xf7\x82\xcf\x37\x90\x55\xa6\x08\x26\x9b\x16",
+ "\x43\x8d\x03\x0f\xd0\xb7\xa5\x4f\xa8\x37\xf2\xad\x20\x1a\x64\x03",
+ "\xa5\x90\xd3\xee\x4f\xbf\x04\xe3\x24\x7e\x0d\x27\xf2\x86\x42\x3f",
+ "\x5f\xe2\xc1\xa1\x72\xfe\x93\xc4\xb1\x5c\xd3\x7c\xae\xf9\xf5\x38",
+ "\x2c\x97\x32\x5c\xbd\x06\xb3\x6e\xb2\x13\x3d\xd0\x8b\x3a\x01\x7c",
+ "\x92\xc8\x14\x22\x7a\x6b\xca\x94\x9f\xf0\x65\x9f\x00\x2a\xd3\x9e",
+ "\xdc\xe8\x50\x11\x0b\xd8\x32\x8c\xfb\xd5\x08\x41\xd6\x91\x1d\x87",
+ "\x67\xf1\x49\x84\xc7\xda\x79\x12\x48\xe3\x2b\xb5\x92\x25\x83\xda",
+ "\x19\x38\xf2\xcf\x72\xd5\x4e\xe9\x7e\x94\x16\x6f\xa9\x1d\x2a\x36",
+ "\x74\x48\x1e\x96\x46\xed\x49\xfe\x0f\x62\x24\x30\x16\x04\x69\x8e",
+ "\x57\xfc\xa5\xde\x98\xa9\xd6\xd8\x00\x64\x38\xd0\x58\x3d\x8a\x1d",
+ "\x9f\xec\xde\x1c\xef\xdc\x1c\xbe\xd4\x76\x36\x74\xd9\x57\x53\x59",
+ "\xe3\x04\x0c\x00\xeb\x28\xf1\x53\x66\xca\x73\xcb\xd8\x72\xe7\x40",
+ "\x76\x97\x00\x9a\x6a\x83\x1d\xfe\xcc\xa9\x1c\x59\x93\x67\x0f\x7a",
+ "\x58\x53\x54\x23\x21\xf5\x67\xa0\x05\xd5\x47\xa4\xf0\x47\x59\xbd",
+ "\x51\x50\xd1\x77\x2f\x50\x83\x4a\x50\x3e\x06\x9a\x97\x3f\xbd\x7c",
+ };
+
+ const siphash = SipHash128(2, 4);
+
+ var buffer: [64]u8 = undefined;
+ for (vectors) |vector, i| {
+ buffer[i] = @intCast(u8, i);
+
+ const expected = mem.readIntLittle(u128, &vector);
+ testing.expectEqual(siphash.hash(test_key, buffer[0..i]), expected);
+ }
+}
+
+test "iterative non-divisible update" {
+ var buf: [1024]u8 = undefined;
+ for (buf) |*e, i| {
+ e.* = @truncate(u8, i);
+ }
+
+ const key = "0x128dad08f12307";
+ const Siphash = SipHash64(2, 4);
+
+ var end: usize = 9;
+ while (end < buf.len) : (end += 9) {
+ const non_iterative_hash = Siphash.hash(key, buf[0..end]);
+
+ var wy = Siphash.init(key);
+ var i: usize = 0;
+ while (i < end) : (i += 7) {
+ wy.update(buf[i..std.math.min(i + 7, end)]);
+ }
+ const iterative_hash = wy.final();
+
+ std.testing.expectEqual(iterative_hash, non_iterative_hash);
+ }
+}
diff --git a/lib/std/hash/wyhash.zig b/lib/std/hash/wyhash.zig
new file mode 100644
index 0000000000..7e35ccc6d2
--- /dev/null
+++ b/lib/std/hash/wyhash.zig
@@ -0,0 +1,231 @@
+const std = @import("std");
+const mem = std.mem;
+
+const primes = [_]u64{
+ 0xa0761d6478bd642f,
+ 0xe7037ed1a0b428db,
+ 0x8ebc6af09c88c6e3,
+ 0x589965cc75374cc3,
+ 0x1d8e4e27c47d124f,
+};
+
+fn read_bytes(comptime bytes: u8, data: []const u8) u64 {
+ const T = @IntType(false, 8 * bytes);
+ return mem.readIntSliceLittle(T, data[0..bytes]);
+}
+
+fn read_8bytes_swapped(data: []const u8) u64 {
+ return (read_bytes(4, data) << 32 | read_bytes(4, data[4..]));
+}
+
+fn mum(a: u64, b: u64) u64 {
+ var r = std.math.mulWide(u64, a, b);
+ r = (r >> 64) ^ r;
+ return @truncate(u64, r);
+}
+
+fn mix0(a: u64, b: u64, seed: u64) u64 {
+ return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]);
+}
+
+fn mix1(a: u64, b: u64, seed: u64) u64 {
+ return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]);
+}
+
+// Wyhash version which does not store internal state for handling partial buffers.
+// This is needed so that we can maximize the speed for the short key case, which will
+// use the non-iterative api which the public Wyhash exposes.
+const WyhashStateless = struct {
+ seed: u64,
+ msg_len: usize,
+
+ pub fn init(seed: u64) WyhashStateless {
+ return WyhashStateless{
+ .seed = seed,
+ .msg_len = 0,
+ };
+ }
+
+ fn round(self: *WyhashStateless, b: []const u8) void {
+ std.debug.assert(b.len == 32);
+
+ self.seed = mix0(
+ read_bytes(8, b[0..]),
+ read_bytes(8, b[8..]),
+ self.seed,
+ ) ^ mix1(
+ read_bytes(8, b[16..]),
+ read_bytes(8, b[24..]),
+ self.seed,
+ );
+ }
+
+ pub fn update(self: *WyhashStateless, b: []const u8) void {
+ std.debug.assert(b.len % 32 == 0);
+
+ var off: usize = 0;
+ while (off < b.len) : (off += 32) {
+ @inlineCall(self.round, b[off .. off + 32]);
+ }
+
+ self.msg_len += b.len;
+ }
+
+ pub fn final(self: *WyhashStateless, b: []const u8) u64 {
+ std.debug.assert(b.len < 32);
+
+ const seed = self.seed;
+ const rem_len = @intCast(u5, b.len);
+ const rem_key = b[0..rem_len];
+
+ self.seed = switch (rem_len) {
+ 0 => seed,
+ 1 => mix0(read_bytes(1, rem_key), primes[4], seed),
+ 2 => mix0(read_bytes(2, rem_key), primes[4], seed),
+ 3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed),
+ 4 => mix0(read_bytes(4, rem_key), primes[4], seed),
+ 5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed),
+ 6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed),
+ 7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed),
+ 8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed),
+ 9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed),
+ 10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed),
+ 11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed),
+ 12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed),
+ 13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed),
+ 14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed),
+ 15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed),
+ 16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed),
+ 17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed),
+ 18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed),
+ 19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed),
+ 20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed),
+ 21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed),
+ 22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed),
+ 23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed),
+ 24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed),
+ 25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed),
+ 26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed),
+ 27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed),
+ 28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed),
+ 29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed),
+ 30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed),
+ 31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed),
+ };
+
+ self.msg_len += b.len;
+ return mum(self.seed ^ self.msg_len, primes[4]);
+ }
+
+ pub fn hash(seed: u64, input: []const u8) u64 {
+ const aligned_len = input.len - (input.len % 32);
+
+ var c = WyhashStateless.init(seed);
+ @inlineCall(c.update, input[0..aligned_len]);
+ return @inlineCall(c.final, input[aligned_len..]);
+ }
+};
+
+/// Fast non-cryptographic 64bit hash function.
+/// See https://github.com/wangyi-fudan/wyhash
+pub const Wyhash = struct {
+ state: WyhashStateless,
+
+ buf: [32]u8,
+ buf_len: usize,
+
+ pub fn init(seed: u64) Wyhash {
+ return Wyhash{
+ .state = WyhashStateless.init(seed),
+ .buf = undefined,
+ .buf_len = 0,
+ };
+ }
+
+ pub fn update(self: *Wyhash, b: []const u8) void {
+ var off: usize = 0;
+
+ if (self.buf_len != 0 and self.buf_len + b.len >= 32) {
+ off += 32 - self.buf_len;
+ mem.copy(u8, self.buf[self.buf_len..], b[0..off]);
+ self.state.update(self.buf[0..]);
+ self.buf_len = 0;
+ }
+
+ const remain_len = b.len - off;
+ const aligned_len = remain_len - (remain_len % 32);
+ self.state.update(b[off .. off + aligned_len]);
+
+ mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]);
+ self.buf_len += @intCast(u8, b[off + aligned_len ..].len);
+ }
+
+ pub fn final(self: *Wyhash) u64 {
+ const seed = self.state.seed;
+ const rem_len = @intCast(u5, self.buf_len);
+ const rem_key = self.buf[0..self.buf_len];
+
+ return self.state.final(rem_key);
+ }
+
+ pub fn hash(seed: u64, input: []const u8) u64 {
+ return WyhashStateless.hash(seed, input);
+ }
+};
+
+const expectEqual = std.testing.expectEqual;
+
+test "test vectors" {
+ const hash = Wyhash.hash;
+
+ expectEqual(hash(0, ""), 0x0);
+ expectEqual(hash(1, "a"), 0xbed235177f41d328);
+ expectEqual(hash(2, "abc"), 0xbe348debe59b27c3);
+ expectEqual(hash(3, "message digest"), 0x37320f657213a290);
+ expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c);
+ expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f);
+ expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e);
+}
+
+test "test vectors streaming" {
+ var wh = Wyhash.init(5);
+ for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| {
+ wh.update(mem.asBytes(&e));
+ }
+ expectEqual(wh.final(), 0x602a1894d3bbfe7f);
+
+ const pattern = "1234567890";
+ const count = 8;
+ const result = 0x829e9c148b75970e;
+ expectEqual(Wyhash.hash(6, pattern ** 8), result);
+
+ wh = Wyhash.init(6);
+ var i: u32 = 0;
+ while (i < count) : (i += 1) {
+ wh.update(pattern);
+ }
+ expectEqual(wh.final(), result);
+}
+
+test "iterative non-divisible update" {
+ var buf: [8192]u8 = undefined;
+ for (buf) |*e, i| {
+ e.* = @truncate(u8, i);
+ }
+
+ const seed = 0x128dad08f;
+
+ var end: usize = 32;
+ while (end < buf.len) : (end += 32) {
+ const non_iterative_hash = Wyhash.hash(seed, buf[0..end]);
+
+ var wy = Wyhash.init(seed);
+ var i: usize = 0;
+ while (i < end) : (i += 33) {
+ wy.update(buf[i..std.math.min(i + 33, end)]);
+ }
+ const iterative_hash = wy.final();
+
+ std.testing.expectEqual(iterative_hash, non_iterative_hash);
+ }
+}