diff options
| author | Marc Tiehuis <marctiehuis@gmail.com> | 2018-03-10 10:00:07 +1300 |
|---|---|---|
| committer | Marc Tiehuis <marctiehuis@gmail.com> | 2018-03-10 10:00:07 +1300 |
| commit | 7a893691c0aedf4d7ae68a9eb06800e4094381cc (patch) | |
| tree | a1f4315923fb78ff938edeb6d5bd124b47a1bb5d /std | |
| parent | 5a7a0e8518bcb9e63c06dba21d9c9e2bb0827330 (diff) | |
| download | zig-7a893691c0aedf4d7ae68a9eb06800e4094381cc.tar.gz zig-7a893691c0aedf4d7ae68a9eb06800e4094381cc.zip | |
Unroll Sha3 inner loop
Issue #699 since fixed. Nearly a x3 perf improvement.
Using --release-fast.
Sha3_256 (before): 96 Mb/s
Sha3_256 (after): 267 Mb/s
Sha3_512 (before): 53 Mb/s
Sha3_512 (after): 142 Mb/s
No real gains from unrolling other initialization loops in crypto
functions so have been left as is.
Diffstat (limited to 'std')
| -rw-r--r-- | std/crypto/md5.zig | 1 | ||||
| -rw-r--r-- | std/crypto/sha2.zig | 2 | ||||
| -rw-r--r-- | std/crypto/sha3.zig | 21 |
3 files changed, 10 insertions, 14 deletions
diff --git a/std/crypto/md5.zig b/std/crypto/md5.zig index 313bff91bf..26700cd65b 100644 --- a/std/crypto/md5.zig +++ b/std/crypto/md5.zig @@ -108,7 +108,6 @@ pub const Md5 = struct { var s: [16]u32 = undefined; - // ERROR: cannot unroll this at comptime var i: usize = 0; while (i < 16) : (i += 1) { // NOTE: Performing or's separately improves perf by ~10% diff --git a/std/crypto/sha2.zig b/std/crypto/sha2.zig index bfb604d456..113bab926b 100644 --- a/std/crypto/sha2.zig +++ b/std/crypto/sha2.zig @@ -156,7 +156,6 @@ fn Sha2_32(comptime params: Sha2Params32) type { return struct { var s: [64]u32 = undefined; - // ERROR: Cannot unroll at compile-time. var i: usize = 0; while (i < 16) : (i += 1) { s[i] = 0; @@ -472,7 +471,6 @@ fn Sha2_64(comptime params: Sha2Params64) type { return struct { var s: [80]u64 = undefined; - // ERROR: Cannot unroll at compile-time. var i: usize = 0; while (i < 16) : (i += 1) { s[i] = 0; diff --git a/std/crypto/sha3.zig b/std/crypto/sha3.zig index 759ddf43aa..6e6a86b3d5 100644 --- a/std/crypto/sha3.zig +++ b/std/crypto/sha3.zig @@ -123,35 +123,34 @@ fn keccak_f(comptime F: usize, d: []u8) void { *r = mem.readIntLE(u64, d[8*i .. 8*i + 8]); } - var x: usize = 0; - var y: usize = 0; - // TODO: Cannot unroll all loops here due to comptime differences. - inline for (RC[0..no_rounds]) |round| { + comptime var x: usize = 0; + comptime var y: usize = 0; + for (RC[0..no_rounds]) |round| { // theta - x = 0; while (x < 5) : (x += 1) { + x = 0; inline while (x < 5) : (x += 1) { c[x] = s[x] ^ s[x+5] ^ s[x+10] ^ s[x+15] ^ s[x+20]; } - x = 0; while (x < 5) : (x += 1) { + x = 0; inline while (x < 5) : (x += 1) { t[0] = c[M5[x+4]] ^ math.rotl(u64, c[M5[x+1]], usize(1)); - y = 0; while (y < 5) : (y += 1) { + y = 0; inline while (y < 5) : (y += 1) { s[x + y*5] ^= t[0]; } } // rho+pi t[0] = s[1]; - x = 0; while (x < 24) : (x += 1) { + x = 0; inline while (x < 24) : (x += 1) { c[0] = s[PIL[x]]; s[PIL[x]] = math.rotl(u64, t[0], ROTC[x]); t[0] = c[0]; } // chi - y = 0; while (y < 5) : (y += 1) { - x = 0; while (x < 5) : (x += 1) { + y = 0; inline while (y < 5) : (y += 1) { + x = 0; inline while (x < 5) : (x += 1) { c[x] = s[x + y*5]; } - x = 0; while (x < 5) : (x += 1) { + x = 0; inline while (x < 5) : (x += 1) { s[x + y*5] = c[x] ^ (~c[M5[x+1]] & c[M5[x+2]]); } } |
