diff options
| author | Frank Denis <github@pureftpd.org> | 2020-08-21 13:56:11 +0200 |
|---|---|---|
| committer | Frank Denis <github@pureftpd.org> | 2020-08-21 16:30:52 +0200 |
| commit | c9218f17197a0d778d9c89787ee0807dc0d00a91 (patch) | |
| tree | 6c1000e5ee38ee48ac37f14b52215b4d04db5be9 /lib/std/crypto/poly1305.zig | |
| parent | 243b5c7a889a3cbc86b5b0720b9ed1cdc6c29835 (diff) | |
| download | zig-c9218f17197a0d778d9c89787ee0807dc0d00a91.tar.gz zig-c9218f17197a0d778d9c89787ee0807dc0d00a91.zip | |
Make poly1305 faster
Diffstat (limited to 'lib/std/crypto/poly1305.zig')
| -rw-r--r-- | lib/std/crypto/poly1305.zig | 351 |
1 files changed, 160 insertions, 191 deletions
diff --git a/lib/std/crypto/poly1305.zig b/lib/std/crypto/poly1305.zig index 2ed97f4c2e..7f2966f991 100644 --- a/lib/std/crypto/poly1305.zig +++ b/lib/std/crypto/poly1305.zig @@ -3,224 +3,193 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. -// Translated from monocypher which is licensed under CC-0/BSD-3. -// -// https://monocypher.org/ - -const std = @import("../std.zig"); -const builtin = std.builtin; - -const Endian = builtin.Endian; -const readIntLittle = std.mem.readIntLittle; -const writeIntLittle = std.mem.writeIntLittle; +const std = @import("std"); +const mem = std.mem; pub const Poly1305 = struct { - const Self = @This(); - + pub const block_size: usize = 16; pub const mac_length = 16; pub const minimum_key_length = 32; // constant multiplier (from the secret key) - r: [4]u32, + r: [3]u64, // accumulated hash - h: [5]u32, - // chunk of the message - c: [5]u32, + h: [3]u64 = [_]u64{ 0, 0, 0 }, // random number added at the end (from the secret key) - pad: [4]u32, - // How many bytes are there in the chunk. - c_idx: usize, - - fn secureZero(self: *Self) void { - std.mem.secureZero(u8, @ptrCast([*]u8, self)[0..@sizeOf(Poly1305)]); - } + pad: [2]u64, + // how many bytes are waiting to be processed in a partial block + leftover: usize = 0, + // partial block buffer + buf: [block_size]u8 align(16) = undefined, - pub fn create(out: []u8, msg: []const u8, key: []const u8) void { - std.debug.assert(out.len >= mac_length); + pub fn init(key: []const u8) Poly1305 { std.debug.assert(key.len >= minimum_key_length); + const t0 = mem.readIntLittle(u64, key[0..8]); + const t1 = mem.readIntLittle(u64, key[8..16]); + return Poly1305{ + .r = [_]u64{ + t0 & 0xffc0fffffff, + ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff, + ((t1 >> 24)) & 0x00ffffffc0f, + }, + .pad = [_]u64{ + mem.readIntLittle(u64, key[16..24]), + mem.readIntLittle(u64, key[24..32]), + }, + }; + } - var ctx = Poly1305.init(key); - ctx.update(msg); - ctx.final(out); + fn blocks(st: *Poly1305, m: []const u8, last: comptime bool) void { + const hibit: u64 = if (last) 0 else 1 << 40; + const r0 = st.r[0]; + const r1 = st.r[1]; + const r2 = st.r[2]; + const s1 = r1 * (5 << 2); + const s2 = r2 * (5 << 2); + var i: usize = 0; + while (i + block_size <= m.len) : (i += block_size) { + // h += m[i] + const t0 = mem.readIntLittle(u64, m[i..][0..8]); + const t1 = mem.readIntLittle(u64, m[i + 8 ..][0..8]); + st.h[0] += t0 & 0xfffffffffff; + st.h[1] += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffff; + st.h[2] += (((t1 >> 24)) & 0x3ffffffffff) | hibit; + + // h *= r + const d0 = @as(u128, st.h[0]) * @as(u128, r0) + @as(u128, st.h[1]) * @as(u128, s2) + @as(u128, st.h[2]) * @as(u128, s1); + var d1 = @as(u128, st.h[0]) * @as(u128, r1) + @as(u128, st.h[1]) * @as(u128, r0) + @as(u128, st.h[2]) * @as(u128, s2); + var d2 = @as(u128, st.h[0]) * @as(u128, r2) + @as(u128, st.h[1]) * @as(u128, r1) + @as(u128, st.h[2]) * @as(u128, r0); + + // partial reduction + var carry = d0 >> 44; + st.h[0] = @truncate(u64, d0) & 0xfffffffffff; + d1 += carry; + carry = @intCast(u64, d1 >> 44); + st.h[1] = @truncate(u64, d1) & 0xfffffffffff; + d2 += carry; + carry = @intCast(u64, d2 >> 42); + st.h[2] = @truncate(u64, d2) & 0x3ffffffffff; + st.h[0] += @truncate(u64, carry) * 5; + carry = st.h[0] >> 44; + st.h[0] &= 0xfffffffffff; + st.h[1] += @truncate(u64, carry); + } } - // Initialize the MAC context. - // - key.len is sufficient size. - pub fn init(key: []const u8) Self { - var ctx: Poly1305 = undefined; + pub fn update(st: *Poly1305, m: []const u8) void { + var mb = m; - // Initial hash is zero - { - var i: usize = 0; - while (i < 5) : (i += 1) { - ctx.h[i] = 0; + // handle leftover + if (st.leftover > 0) { + const want = std.math.min(block_size - st.leftover, mb.len); + const mc = mb[0..want]; + for (mc) |x, i| { + st.buf[st.leftover + i] = x; } - } - // add 2^130 to every input block - ctx.c[4] = 1; - polyClearC(&ctx); - - // load r and pad (r has some of its bits cleared) - { - var i: usize = 0; - while (i < 1) : (i += 1) { - ctx.r[0] = readIntLittle(u32, key[0..4]) & 0x0fffffff; + mb = mb[want..]; + st.leftover += want; + if (st.leftover > block_size) { + return; } + st.blocks(&st.buf, false); + st.leftover = 0; } - { - var i: usize = 1; - while (i < 4) : (i += 1) { - ctx.r[i] = readIntLittle(u32, key[i * 4 ..][0..4]) & 0x0ffffffc; - } - } - { - var i: usize = 0; - while (i < 4) : (i += 1) { - ctx.pad[i] = readIntLittle(u32, key[i * 4 + 16 ..][0..4]); - } - } - - return ctx; - } - - // h = (h + c) * r - // preconditions: - // ctx->h <= 4_ffffffff_ffffffff_ffffffff_ffffffff - // ctx->c <= 1_ffffffff_ffffffff_ffffffff_ffffffff - // ctx->r <= 0ffffffc_0ffffffc_0ffffffc_0fffffff - // Postcondition: - // ctx->h <= 4_ffffffff_ffffffff_ffffffff_ffffffff - fn polyBlock(ctx: *Self) void { - // s = h + c, without carry propagation - const s0 = @as(u64, ctx.h[0]) + ctx.c[0]; // s0 <= 1_fffffffe - const s1 = @as(u64, ctx.h[1]) + ctx.c[1]; // s1 <= 1_fffffffe - const s2 = @as(u64, ctx.h[2]) + ctx.c[2]; // s2 <= 1_fffffffe - const s3 = @as(u64, ctx.h[3]) + ctx.c[3]; // s3 <= 1_fffffffe - const s4 = @as(u64, ctx.h[4]) + ctx.c[4]; // s4 <= 5 - - // Local all the things! - const r0 = ctx.r[0]; // r0 <= 0fffffff - const r1 = ctx.r[1]; // r1 <= 0ffffffc - const r2 = ctx.r[2]; // r2 <= 0ffffffc - const r3 = ctx.r[3]; // r3 <= 0ffffffc - const rr0 = (r0 >> 2) * 5; // rr0 <= 13fffffb // lose 2 bits... - const rr1 = (r1 >> 2) + r1; // rr1 <= 13fffffb // rr1 == (r1 >> 2) * 5 - const rr2 = (r2 >> 2) + r2; // rr2 <= 13fffffb // rr1 == (r2 >> 2) * 5 - const rr3 = (r3 >> 2) + r3; // rr3 <= 13fffffb // rr1 == (r3 >> 2) * 5 - - // (h + c) * r, without carry propagation - const x0 = s0 * r0 + s1 * rr3 + s2 * rr2 + s3 * rr1 + s4 * rr0; //<=97ffffe007fffff8 - const x1 = s0 * r1 + s1 * r0 + s2 * rr3 + s3 * rr2 + s4 * rr1; //<=8fffffe20ffffff6 - const x2 = s0 * r2 + s1 * r1 + s2 * r0 + s3 * rr3 + s4 * rr2; //<=87ffffe417fffff4 - const x3 = s0 * r3 + s1 * r2 + s2 * r1 + s3 * r0 + s4 * rr3; //<=7fffffe61ffffff2 - const x4 = s4 * (r0 & 3); // ...recover 2 bits //<= f - - // partial reduction modulo 2^130 - 5 - const _u5 = @truncate(u32, x4 + (x3 >> 32)); // u5 <= 7ffffff5 - const _u0 = (_u5 >> 2) * 5 + (x0 & 0xffffffff); - const _u1 = (_u0 >> 32) + (x1 & 0xffffffff) + (x0 >> 32); - const _u2 = (_u1 >> 32) + (x2 & 0xffffffff) + (x1 >> 32); - const _u3 = (_u2 >> 32) + (x3 & 0xffffffff) + (x2 >> 32); - const _u4 = (_u3 >> 32) + (_u5 & 3); - - // Update the hash - ctx.h[0] = @truncate(u32, _u0); // u0 <= 1_9ffffff0 - ctx.h[1] = @truncate(u32, _u1); // u1 <= 1_97ffffe0 - ctx.h[2] = @truncate(u32, _u2); // u2 <= 1_8fffffe2 - ctx.h[3] = @truncate(u32, _u3); // u3 <= 1_87ffffe4 - ctx.h[4] = @truncate(u32, _u4); // u4 <= 4 - } - - // (re-)initializes the input counter and input buffer - fn polyClearC(ctx: *Self) void { - ctx.c[0] = 0; - ctx.c[1] = 0; - ctx.c[2] = 0; - ctx.c[3] = 0; - ctx.c_idx = 0; - } - fn polyTakeInput(ctx: *Self, input: u8) void { - const word = ctx.c_idx >> 2; - const byte = ctx.c_idx & 3; - ctx.c[word] |= std.math.shl(u32, input, byte * 8); - ctx.c_idx += 1; - } + // process full blocks + if (mb.len >= block_size) { + const want = mb.len & ~(block_size - 1); + st.blocks(mb[0..want], false); + mb = mb[want..]; + } - fn polyUpdate(ctx: *Self, msg: []const u8) void { - for (msg) |b| { - polyTakeInput(ctx, b); - if (ctx.c_idx == 16) { - polyBlock(ctx); - polyClearC(ctx); + // store leftover + if (mb.len > 0) { + for (mb) |x, i| { + st.buf[st.leftover + i] = x; } + st.leftover += mb.len; } } - fn alignTo(x: usize, block_size: usize) usize { - return ((~x) +% 1) & (block_size - 1); - } - - // Feed data into the MAC context. - pub fn update(ctx: *Self, msg: []const u8) void { - // Align ourselves with block boundaries - const alignm = std.math.min(alignTo(ctx.c_idx, 16), msg.len); - polyUpdate(ctx, msg[0..alignm]); - - var nmsg = msg[alignm..]; - - // Process the msg block by block - const nb_blocks = nmsg.len >> 4; - var i: usize = 0; - while (i < nb_blocks) : (i += 1) { - ctx.c[0] = readIntLittle(u32, nmsg[0..4]); - ctx.c[1] = readIntLittle(u32, nmsg[4..8]); - ctx.c[2] = readIntLittle(u32, nmsg[8..12]); - ctx.c[3] = readIntLittle(u32, nmsg[12..16]); - polyBlock(ctx); - nmsg = nmsg[16..]; - } - if (nb_blocks > 0) { - polyClearC(ctx); + pub fn final(st: *Poly1305, out: []u8) void { + std.debug.assert(out.len >= mac_length); + if (st.leftover > 0) { + var i = st.leftover; + st.buf[i] = 1; + i += 1; + while (i < block_size) : (i += 1) { + st.buf[i] = 0; + } + st.blocks(&st.buf, true); } - - // remaining bytes - polyUpdate(ctx, nmsg[0..]); + // fully carry h + var carry = st.h[1] >> 44; + st.h[1] &= 0xfffffffffff; + st.h[2] += carry; + carry = st.h[2] >> 42; + st.h[2] &= 0x3ffffffffff; + st.h[0] += carry * 5; + carry = st.h[0] >> 44; + st.h[0] &= 0xfffffffffff; + st.h[1] += carry; + carry = st.h[1] >> 44; + st.h[1] &= 0xfffffffffff; + st.h[2] += carry; + carry = st.h[2] >> 42; + st.h[2] &= 0x3ffffffffff; + st.h[0] += carry * 5; + carry = st.h[0] >> 44; + st.h[0] &= 0xfffffffffff; + st.h[1] += carry; + + // compute h + -p + var g0 = st.h[0] + 5; + carry = g0 >> 44; + g0 &= 0xfffffffffff; + var g1 = st.h[1] + carry; + carry = g1 >> 44; + g1 &= 0xfffffffffff; + var g2 = st.h[2] + carry -% (1 << 42); + + // (hopefully) constant-time select h if h < p, or h + -p if h >= p + const mask = (g2 >> 63) -% 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + const nmask = ~mask; + st.h[0] = (st.h[0] & nmask) | g0; + st.h[1] = (st.h[1] & nmask) | g1; + st.h[2] = (st.h[2] & nmask) | g2; + + // h = (h + pad) + const t0 = st.pad[0]; + const t1 = st.pad[1]; + st.h[0] += (t0 & 0xfffffffffff); + carry = (st.h[0] >> 44); + st.h[0] &= 0xfffffffffff; + st.h[1] += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + carry; + carry = (st.h[1] >> 44); + st.h[1] &= 0xfffffffffff; + st.h[2] += (((t1 >> 24)) & 0x3ffffffffff) + carry; + st.h[2] &= 0x3ffffffffff; + + // mac = h % (2^128) + st.h[0] |= st.h[1] << 44; + st.h[1] = (st.h[1] >> 20) | (st.h[2] << 24); + + mem.writeIntLittle(u64, out[0..8], st.h[0]); + mem.writeIntLittle(u64, out[8..16], st.h[1]); + + mem.secureZero(u64, &st.r); } - // Finalize the MAC and output into buffer provided by caller. - pub fn final(ctx: *Self, out: []u8) void { - // Process the last block (if any) - if (ctx.c_idx != 0) { - // move the final 1 according to remaining input length - // (We may add less than 2^130 to the last input block) - ctx.c[4] = 0; - polyTakeInput(ctx, 1); - // one last hash update - polyBlock(ctx); - } + pub fn create(out: []u8, msg: []const u8, key: []const u8) void { + std.debug.assert(out.len >= mac_length); + std.debug.assert(key.len >= minimum_key_length); - // check if we should subtract 2^130-5 by performing the - // corresponding carry propagation. - const _u0 = @as(u64, 5) + ctx.h[0]; // <= 1_00000004 - const _u1 = (_u0 >> 32) + ctx.h[1]; // <= 1_00000000 - const _u2 = (_u1 >> 32) + ctx.h[2]; // <= 1_00000000 - const _u3 = (_u2 >> 32) + ctx.h[3]; // <= 1_00000000 - const _u4 = (_u3 >> 32) + ctx.h[4]; // <= 5 - // u4 indicates how many times we should subtract 2^130-5 (0 or 1) - - // h + pad, minus 2^130-5 if u4 exceeds 3 - const uu0 = (_u4 >> 2) * 5 + ctx.h[0] + ctx.pad[0]; // <= 2_00000003 - const uu1 = (uu0 >> 32) + ctx.h[1] + ctx.pad[1]; // <= 2_00000000 - const uu2 = (uu1 >> 32) + ctx.h[2] + ctx.pad[2]; // <= 2_00000000 - const uu3 = (uu2 >> 32) + ctx.h[3] + ctx.pad[3]; // <= 2_00000000 - - writeIntLittle(u32, out[0..4], @truncate(u32, uu0)); - writeIntLittle(u32, out[4..8], @truncate(u32, uu1)); - writeIntLittle(u32, out[8..12], @truncate(u32, uu2)); - writeIntLittle(u32, out[12..16], @truncate(u32, uu3)); - - ctx.secureZero(); + var st = Poly1305.init(key); + st.update(msg); + st.final(out); } }; |
