From 616b23c8159a5b643253299ce2493d3a5abd2b81 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sat, 25 Sep 2021 00:49:45 +0200
Subject: big ints: split lladd/llsub into carry variants

lladd is now implemented in terms of lladdcarry, which returns the carry limb.
Similarly, llsub is implemented using llsubcarry, which returns the borrow limb.
---
 lib/std/math/big/int.zig | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index b7ea284004..7a1aba1f2f 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -2133,10 +2133,10 @@ fn llnormalize(a: []const Limb) usize {
 }
 
 /// Knuth 4.3.1, Algorithm S.
-fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void {
+fn llsubcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
     @setRuntimeSafety(debug_safety);
     assert(a.len != 0 and b.len != 0);
-    assert(a.len > b.len or (a.len == b.len and a[a.len - 1] >= b[b.len - 1]));
+    assert(a.len >= b.len);
     assert(r.len >= a.len);
 
     var i: usize = 0;
@@ -2153,15 +2153,21 @@ fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void {
         borrow = @boolToInt(@subWithOverflow(Limb, a[i], borrow, &r[i]));
     }
 
-    assert(borrow == 0);
+    return borrow;
+}
+
+fn llsub(r: []Limb, a: []const Limb, b: []const Limb) void {
+    @setRuntimeSafety(debug_safety);
+    assert(a.len > b.len or (a.len == b.len and a[a.len - 1] >= b[b.len - 1]));
+    assert(llsubcarry(r, a, b) == 0);
 }
 
 /// Knuth 4.3.1, Algorithm A.
-fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void {
+fn lladdcarry(r: []Limb, a: []const Limb, b: []const Limb) Limb {
     @setRuntimeSafety(debug_safety);
     assert(a.len != 0 and b.len != 0);
     assert(a.len >= b.len);
-    assert(r.len >= a.len + 1);
+    assert(r.len >= a.len);
 
     var i: usize = 0;
     var carry: Limb = 0;
@@ -2177,7 +2183,13 @@ fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void {
         carry = @boolToInt(@addWithOverflow(Limb, a[i], carry, &r[i]));
     }
 
-    r[i] = carry;
+    return carry;
+}
+
+fn lladd(r: []Limb, a: []const Limb, b: []const Limb) void {
+    @setRuntimeSafety(debug_safety);
+    assert(r.len >= a.len + 1);
+    r[a.len] = lladdcarry(r, a, b);
 }
 
 /// Knuth 4.3.1, Exercise 16.
-- 
cgit v1.2.3


From b58cf6dab6ff64c3403d3776a430694534f7b818 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 26 Sep 2021 07:40:12 +0200
Subject: big ints: 2s complement truncate

---
 lib/std/math/big/int.zig | 107 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 7a1aba1f2f..46ca41dfc2 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -58,6 +58,11 @@ pub fn calcPowLimbsBufferLen(a_bit_count: usize, y: usize) usize {
     return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits;
 }
 
+// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
+pub fn calcTwosCompLimbCount(bit_count: usize) usize {
+    return std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable;
+}
+
 /// a + b * c + *carry, sets carry to the overflow bits
 pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
     @setRuntimeSafety(debug_safety);
@@ -980,6 +985,91 @@ pub const Mutable = struct {
         r.normalize(r.len);
     }
 
+    /// Truncate an integer to a number of bits, following 2s-complement semantics.
+    /// r may alias a.
+    ///
+    /// Asserts `r` has enough storage to store the result.
+    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
+    pub fn truncate(r: *Mutable, a: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        const req_limbs = (bit_count + @bitSizeOf(Limb) - 1) / @bitSizeOf(Limb);
+
+        // Handle 0-bit integers.
+        if (req_limbs == 0 or a.eqZero()) {
+            r.set(0);
+            return;
+        }
+
+        const bit = @truncate(Log2Limb, bit_count - 1);
+        const signmask = @as(Limb, 1) << bit;
+        const mask = (signmask << 1) -% 1;
+
+        if (!a.positive) {
+            // Convert the integer from sign-magnitude into twos-complement.
+            // -x = ~(x - 1)
+            // Note, we simply take req_limbs * @bitSizeOf(Limb) as the
+            // target bit count.
+
+            r.addScalar(a.abs(), -1);
+
+            // Zero-extend the result
+            if (req_limbs > r.len) {
+                mem.set(Limb, r.limbs[r.len .. req_limbs], 0);
+            }
+
+            // Truncate to required number of limbs.
+            assert(r.limbs.len >= req_limbs);
+            r.len = req_limbs;
+
+            // Without truncating, we can already peek at the sign bit of the result here.
+            // Note that it will be 0 if the result is negative, as we did not apply the flip here.
+            // If the result is negative, we have
+            // -(-x & mask)
+            // = ~(~(x - 1) & mask) + 1
+            // = ~(~((x - 1) | ~mask)) + 1
+            // = ((x - 1) | ~mask)) + 1
+            // Note, this is only valid for the target bits and not the upper bits
+            // of the most significant limb. Those still need to be cleared.
+            // Also note that `mask` is zero for all other bits, reducing to the identity.
+            // This means that we still need to use & mask to clear off the upper bits.
+
+            if (signedness == .signed and r.limbs[r.len - 1] & signmask == 0) {
+                // Re-add the one and negate to get the result.
+                r.limbs[r.len - 1] &= mask;
+                // Note, addition cannot require extra limbs here as we did a subtraction before.
+                r.addScalar(r.toConst(), 1);
+                r.normalize(r.len);
+                r.positive = false;
+            } else {
+                llnot(r.limbs[0..r.len]);
+                r.limbs[r.len - 1] &= mask;
+                r.normalize(r.len);
+            }
+        } else {
+            r.copy(a);
+            if (r.len < req_limbs) {
+                // Integer fits within target bits, no wrapping required.
+                return;
+            }
+
+            r.len = req_limbs;
+            r.limbs[r.len - 1] &= mask;
+            r.normalize(r.len);
+
+            if (signedness == .signed and r.limbs[r.len - 1] & signmask != 0) {
+                // Convert 2s-complement back to sign-magnitude.
+                // Sign-extend the upper bits so that they are inverted correctly.
+                r.limbs[r.len - 1] |= ~mask;
+                llnot(r.limbs[0..r.len]);
+
+                // Note, can only overflow if r holds 0xFFF...F which can only happen if
+                // a holds 0.
+                r.addScalar(r.toConst(), 1);
+
+                r.positive = false;
+            }
+        }
+    }
+
     /// Normalize a possible sequence of leading zeros.
     ///
     /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
@@ -1941,6 +2031,14 @@ pub const Managed = struct {
             rma.setMetadata(rma_mut.positive, rma_mut.len);
         }
     }
+
+    // r = truncate(Int(signedness, bit_count), a)
+    pub fn truncate(r: *Managed, a: Const, signedness: std.builtin.Signedness, bit_count: usize) !void {
+        try r.ensureCapacity(calcTwosCompLimbCount(a.limbs.len));
+        var m = r.toMutable();
+        m.truncate(a, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
 };
 
 /// Knuth 4.3.1, Algorithm M.
@@ -2270,6 +2368,15 @@ fn llshr(r: []Limb, a: []const Limb, shift: usize) void {
     }
 }
 
+// r = ~r
+fn llnot(r: []Limb) void {
+    @setRuntimeSafety(debug_safety);
+
+    for (r) |*elem| {
+        elem.* = ~elem.*;
+    }
+}
+
 // r = a | b with 2s complement semantics.
 // r may alias.
 // a and b must not be 0.
-- 
cgit v1.2.3


From fdb37743fa48bc1ca41a11148dd5d801e9c3a34e Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 27 Sep 2021 03:12:38 +0200
Subject: big ints: addWrap, subWrap + fix Managed.truncate allocation size

---
 lib/std/math/big/int.zig | 117 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 46ca41dfc2..59d59a19d3 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -346,6 +346,47 @@ pub const Mutable = struct {
         }
     }
 
+    /// r = a + b with 2s-complement wrapping semantics.
+    ///
+    /// r, a and b may be aliases
+    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+
+        // We can ignore the upper bits here, those results will be discarded anyway.
+        const a_limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)];
+        const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
+
+        if (a.eqZero()) {
+            r.copy(b);
+        } else if (b.eqZero()) {
+            r.copy(a);
+        } else if (a.positive != b.positive) {
+            if (a.positive) {
+                // (a) + (-b) => a - b
+                r.subWrap(a, b.abs(), signedness, bit_count);
+            } else {
+                // (-a) + (b) => b - a
+                r.subWrap(b, a.abs(), signedness, bit_count);
+            }
+            // Don't need to truncate, subWrap does that for us.
+            return;
+        } else {
+            if (a_limbs.len >= b_limbs.len) {
+                _ = lladdcarry(r.limbs, a_limbs, b_limbs);
+                r.normalize(a_limbs.len);
+            } else {
+                _ = lladdcarry(r.limbs, b_limbs, b_limbs);
+                r.normalize(b_limbs.len);
+            }
+
+            r.positive = a.positive;
+        }
+
+        r.truncate(r.toConst(), signedness, bit_count);
+    }
+
     /// r = a - b
     ///
     /// r, a and b may be aliases.
@@ -389,6 +430,59 @@ pub const Mutable = struct {
         }
     }
 
+    /// r = a - b with 2s-complement wrapping semantics.
+    ///
+    /// r, a and b may be aliases
+    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+
+        // We can ignore the upper bits here, those results will be discarded anyway.
+        // We also don't need to mind order here. Again, overflow is ignored here.
+        const a_limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)];
+        const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
+
+        if (a.positive != b.positive) {
+              if (a.positive) {
+                // (a) - (-b) => a + b
+                r.addWrap(a, b.abs(), signedness, bit_count);
+            } else {
+                // (-a) - (b) => -a + -b
+                // Note, we don't do -(a + b) here to avoid a second truncate.
+                r.addWrap(a, b.negate(), signedness, bit_count);
+            }
+            // Don't need to truncate, addWrap does that for us.
+            return;
+        } else if (a.positive) {
+            if (a_limbs.len >= b_limbs.len) {
+                // (a) - (b) => a - b
+                _ = llsubcarry(r.limbs, a_limbs, b_limbs);
+                r.normalize(a_limbs.len);
+                r.positive = true;
+            } else {
+                // (a) - (b) => -b + a => -(b - a)
+                _ = llsubcarry(r.limbs, b_limbs, a_limbs);
+                r.normalize(b_limbs.len);
+                r.positive = false;
+            }
+        } else {
+            if (a_limbs.len >= b_limbs.len) {
+                // (-a) - (-b) => -(a - b)
+                _ = llsubcarry(r.limbs, a_limbs, b_limbs);
+                r.normalize(a_limbs.len);
+                r.positive = false;
+            } else {
+                // (-a) - (-b) => --b + -a => b - a
+                _ = llsubcarry(r.limbs, b_limbs, a_limbs);
+                r.normalize(b_limbs.len);
+                r.positive = true;
+            }
+        }
+
+        r.truncate(r.toConst(), signedness, bit_count);
+    }
+
     /// rma = a * b
     ///
     /// `rma` may alias with `a` or `b`.
@@ -1130,6 +1224,13 @@ pub const Const = struct {
         };
     }
 
+    pub fn negate(self: Const) Const {
+        return .{
+            .limbs = self.limbs,
+            .positive = !self.positive,
+        };
+    }
+
     pub fn isOdd(self: Const) bool {
         return self.limbs[0] & 1 != 0;
     }
@@ -1831,6 +1932,13 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    pub fn addWrap(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.addWrap(a, b, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
+
     /// r = a - b
     ///
     /// r, a and b may be aliases.
@@ -1843,6 +1951,13 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    pub fn subWrap(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.subWrap(a, b, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
+
     /// rma = a * b
     ///
     /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
@@ -2034,7 +2149,7 @@ pub const Managed = struct {
 
     // r = truncate(Int(signedness, bit_count), a)
     pub fn truncate(r: *Managed, a: Const, signedness: std.builtin.Signedness, bit_count: usize) !void {
-        try r.ensureCapacity(calcTwosCompLimbCount(a.limbs.len));
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
         var m = r.toMutable();
         m.truncate(a, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
-- 
cgit v1.2.3


From 69be6ba8eea8e0b64153a0d0676f1b8749df8195 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 27 Sep 2021 04:58:27 +0200
Subject: Comptime wrapping addition/subtraction

---
 lib/std/math/big/int.zig |  4 ++--
 src/value.zig            | 53 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 59d59a19d3..86d0e22cb8 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -444,7 +444,7 @@ pub const Mutable = struct {
         const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
 
         if (a.positive != b.positive) {
-              if (a.positive) {
+            if (a.positive) {
                 // (a) - (-b) => a + b
                 r.addWrap(a, b.abs(), signedness, bit_count);
             } else {
@@ -1107,7 +1107,7 @@ pub const Mutable = struct {
 
             // Zero-extend the result
             if (req_limbs > r.len) {
-                mem.set(Limb, r.limbs[r.len .. req_limbs], 0);
+                mem.set(Limb, r.limbs[r.len..req_limbs], 0);
             }
 
             // Truncate to required number of limbs.
diff --git a/src/value.zig b/src/value.zig
index fa11bb6ddb..2894840166 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1660,19 +1660,26 @@ pub const Value = extern union {
         if (ty.isAnyFloat()) {
             return floatAdd(lhs, rhs, ty, arena);
         }
-        const result = try intAdd(lhs, rhs, arena);
 
-        const max = try ty.maxInt(arena, target);
-        if (compare(result, .gt, max, ty)) {
-            @panic("TODO comptime wrapping integer addition");
-        }
+        const info = ty.intInfo(target);
 
-        const min = try ty.minInt(arena, target);
-        if (compare(result, .lt, min, ty)) {
-            @panic("TODO comptime wrapping integer addition");
-        }
+        var lhs_space: Value.BigIntSpace = undefined;
+        var rhs_space: Value.BigIntSpace = undefined;
+        const lhs_bigint = lhs.toBigInt(&lhs_space);
+        const rhs_bigint = rhs.toBigInt(&rhs_space);
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1,
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        result_bigint.addWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
+        const result_limbs = result_bigint.limbs[0..result_bigint.len];
 
-        return result;
+        if (result_bigint.positive) {
+            return Value.Tag.int_big_positive.create(arena, result_limbs);
+        } else {
+            return Value.Tag.int_big_negative.create(arena, result_limbs);
+        }
     }
 
     /// Supports integers only; asserts neither operand is undefined.
@@ -1714,19 +1721,27 @@ pub const Value = extern union {
         if (ty.isAnyFloat()) {
             return floatSub(lhs, rhs, ty, arena);
         }
-        const result = try intSub(lhs, rhs, arena);
 
-        const max = try ty.maxInt(arena, target);
-        if (compare(result, .gt, max, ty)) {
-            @panic("TODO comptime wrapping integer subtraction");
-        }
+        const info = ty.intInfo(target);
 
-        const min = try ty.minInt(arena, target);
-        if (compare(result, .lt, min, ty)) {
-            @panic("TODO comptime wrapping integer subtraction");
+        var lhs_space: Value.BigIntSpace = undefined;
+        var rhs_space: Value.BigIntSpace = undefined;
+        const lhs_bigint = lhs.toBigInt(&lhs_space);
+        const rhs_bigint = rhs.toBigInt(&rhs_space);
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len) + 1,
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        result_bigint.subWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
+        const result_limbs = result_bigint.limbs[0..result_bigint.len];
+
+        if (result_bigint.positive) {
+            return Value.Tag.int_big_positive.create(arena, result_limbs);
+        } else {
+            return Value.Tag.int_big_negative.create(arena, result_limbs);
         }
 
-        return result;
     }
 
     /// Supports integers only; asserts neither operand is undefined.
-- 
cgit v1.2.3


From a36ef84deb1d52e513fbf311c71eaa2a9d48de00 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Tue, 28 Sep 2021 11:22:21 +0200
Subject: big ints: Basic wrapping multiplication

---
 lib/std/math/big/int.zig | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 86d0e22cb8..d93dea215f 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -542,6 +542,36 @@ pub const Mutable = struct {
         rma.positive = (a.positive == b.positive);
     }
 
+    pub fn mulNoAliasWrap(
+        rma: *Mutable,
+        a: Const,
+        b: Const,
+        signedness: std.builtin.Signedness,
+        bit_count: usize,
+    ) void {
+        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
+        assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
+
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+
+        // We can ignore the upper bits here, those results will be discarded anyway.
+        const a_limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)];
+        const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
+
+        mem.set(Limb, rma.limbs[0..req_limbs], 0);
+
+        if (a_limbs.len >= b_limbs.len) {
+            llmulacc_lo(rma.limbs, a_limbs, b_limbs);
+        } else {
+            llmulacc_lo(rma.limbs, b_limbs, a_limbs);
+        }
+
+        rma.normalize(math.min(req_limbs, a.limbs.len + b.limbs.len));
+        rma.positive = (a.positive == b.positive);
+
+        rma.truncate(rma.toConst(), signedness, bit_count);
+    }
+
     /// rma = a * a
     ///
     /// `rma` may not alias with `a`.
@@ -2156,6 +2186,19 @@ pub const Managed = struct {
     }
 };
 
+/// r = a * b, ignoring overflow
+fn llmulacc_lo(r: []Limb, a: []const Limb, b: []const Limb) void {
+    assert(r.len >= a.len);
+    assert(a.len >= b.len);
+
+    // TODO: Improve performance.
+
+    var i: usize = 0;
+    while (i < b.len) : (i += 1) {
+        llmulDigit(r[i..], a, b[i]);
+    }
+}
+
 /// Knuth 4.3.1, Algorithm M.
 ///
 /// r MUST NOT alias any of a or b.
-- 
cgit v1.2.3


From dc1f69854504c4dd7fbd18564c4a6811e3cc87e3 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Wed, 29 Sep 2021 00:11:42 +0200
Subject: big ints: unify add/sub with their wrapping variants

---
 lib/std/math/big/int.zig | 241 ++++++++++++++++++++++++-----------------------
 1 file changed, 124 insertions(+), 117 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index d93dea215f..4494da3f55 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -300,49 +300,55 @@ pub const Mutable = struct {
         return add(r, a, operand);
     }
 
-    /// r = a + b
-    ///
+    /// Base implementation for addition. Adds `max(a.limbs.len, b.limbs.len)` elements from a and b,
+    /// and returns whether any overflow occured.
     /// r, a and b may be aliases.
     ///
-    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
-    /// r is `math.max(a.limbs.len, b.limbs.len) + 1`.
-    pub fn add(r: *Mutable, a: Const, b: Const) void {
+    /// Asserts r has enough elements to hold the result. The upper bound is `max(a.limbs.len, b.limbs.len)`.
+    fn addCarry(r: *Mutable, a: Const, b: Const) bool {
         if (a.eqZero()) {
             r.copy(b);
-            return;
+            return false;
         } else if (b.eqZero()) {
             r.copy(a);
-            return;
-        }
-
-        if (a.limbs.len == 1 and b.limbs.len == 1 and a.positive == b.positive) {
-            var o: Limb = undefined;
-            if (!@addWithOverflow(Limb, a.limbs[0], b.limbs[0], &o)) {
-                r.limbs[0] = o;
-                r.len = 1;
-                r.positive = a.positive;
-                return;
-            }
-        }
-
-        if (a.positive != b.positive) {
+            return false;
+        } else if (a.positive != b.positive) {
             if (a.positive) {
                 // (a) + (-b) => a - b
-                r.sub(a, b.abs());
+                return r.subCarry(a, b.abs());
             } else {
                 // (-a) + (b) => b - a
-                r.sub(b, a.abs());
+                return r.subCarry(b, a.abs());
             }
         } else {
+            r.positive = a.positive;
             if (a.limbs.len >= b.limbs.len) {
-                lladd(r.limbs[0..], a.limbs, b.limbs);
-                r.normalize(a.limbs.len + 1);
+                const c = lladdcarry(r.limbs, a.limbs, b.limbs);
+                r.normalize(a.limbs.len);
+                return c != 0;
             } else {
-                lladd(r.limbs[0..], b.limbs, a.limbs);
-                r.normalize(b.limbs.len + 1);
+                const c = lladdcarry(r.limbs, b.limbs, a.limbs);
+                r.normalize(b.limbs.len);
+                return c != 0;
             }
+        }
+    }
 
-            r.positive = a.positive;
+    /// r = a + b
+    ///
+    /// r, a and b may be aliases.
+    ///
+    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
+    /// r is `math.max(a.limbs.len, b.limbs.len) + 1`.
+    pub fn add(r: *Mutable, a: Const, b: Const) void {
+        if (r.addCarry(a, b)) {
+            // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
+            // so we need to set the length here.
+            const msl = math.max(a.limbs.len, b.limbs.len);
+            // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here.
+            // Note, the fact that it normalized means that the intermediary limbs are zero here.
+            r.len = msl + 1;
+            r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
         }
     }
 
@@ -354,37 +360,82 @@ pub const Mutable = struct {
     pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
         const req_limbs = calcTwosCompLimbCount(bit_count);
 
-        // We can ignore the upper bits here, those results will be discarded anyway.
-        const a_limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)];
-        const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
+        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
+        // if an overflow occured.
+        const x = Const{
+            .positive = a.positive,
+            .limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)],
+        };
+
+        const y = Const{
+            .positive = b.positive,
+            .limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)],
+        };
 
+        if (r.addCarry(x, y)) {
+            // There are two possibilities here:
+            // - We overflowed req_limbs. In this case, the carry is ignored.
+            // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
+            const msl = math.max(a.limbs.len, b.limbs.len);
+            if (msl < req_limbs) {
+                r.limbs[msl] = 1;
+                r.len = req_limbs;
+            }
+        }
+
+        r.truncate(r.toConst(), signedness, bit_count);
+    }
+
+    /// Base implementation for subtraction. Subtracts `max(a.limbs.len, b.limbs.len)` elements from a and b,
+    /// and returns whether any overflow occured.
+    /// r, a and b may be aliases.
+    ///
+    /// Asserts r has enough elements to hold the result. The upper bound is `max(a.limbs.len, b.limbs.len)`.
+    fn subCarry(r: *Mutable, a: Const, b: Const) bool {
         if (a.eqZero()) {
             r.copy(b);
+            r.positive = !b.positive;
+            return false;
         } else if (b.eqZero()) {
             r.copy(a);
-        } else if (a.positive != b.positive) {
+            return false;
+        } if (a.positive != b.positive) {
             if (a.positive) {
-                // (a) + (-b) => a - b
-                r.subWrap(a, b.abs(), signedness, bit_count);
+                // (a) - (-b) => a + b
+                return r.addCarry(a, b.abs());
             } else {
-                // (-a) + (b) => b - a
-                r.subWrap(b, a.abs(), signedness, bit_count);
+                // (-a) - (b) => -a + -b
+                return r.addCarry(a, b.negate());
+            }
+        } else if (a.positive) {
+            if (a.order(b) != .lt) {
+                // (a) - (b) => a - b
+                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
+                r.normalize(a.limbs.len);
+                r.positive = true;
+                return c != 0;
+            } else {
+                // (a) - (b) => -b + a => -(b - a)
+                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
+                r.normalize(b.limbs.len);
+                r.positive = false;
+                return c != 0;
             }
-            // Don't need to truncate, subWrap does that for us.
-            return;
         } else {
-            if (a_limbs.len >= b_limbs.len) {
-                _ = lladdcarry(r.limbs, a_limbs, b_limbs);
-                r.normalize(a_limbs.len);
+            if (a.order(b) == .lt) {
+                // (-a) - (-b) => -(a - b)
+                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
+                r.normalize(a.limbs.len);
+                r.positive = false;
+                return c != 0;
             } else {
-                _ = lladdcarry(r.limbs, b_limbs, b_limbs);
-                r.normalize(b_limbs.len);
+                // (-a) - (-b) => --b + -a => b - a
+                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
+                r.normalize(b.limbs.len);
+                r.positive = true;
+                return c != 0;
             }
-
-            r.positive = a.positive;
         }
-
-        r.truncate(r.toConst(), signedness, bit_count);
     }
 
     /// r = a - b
@@ -394,39 +445,14 @@ pub const Mutable = struct {
     /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
     /// r is `math.max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
     pub fn sub(r: *Mutable, a: Const, b: Const) void {
-        if (a.positive != b.positive) {
-            if (a.positive) {
-                // (a) - (-b) => a + b
-                r.add(a, b.abs());
-            } else {
-                // (-a) - (b) => -(a + b)
-                r.add(a.abs(), b);
-                r.positive = false;
-            }
-        } else {
-            if (a.positive) {
-                // (a) - (b) => a - b
-                if (a.order(b) != .lt) {
-                    llsub(r.limbs[0..], a.limbs[0..a.limbs.len], b.limbs[0..b.limbs.len]);
-                    r.normalize(a.limbs.len);
-                    r.positive = true;
-                } else {
-                    llsub(r.limbs[0..], b.limbs[0..b.limbs.len], a.limbs[0..a.limbs.len]);
-                    r.normalize(b.limbs.len);
-                    r.positive = false;
-                }
-            } else {
-                // (-a) - (-b) => -(a - b)
-                if (a.order(b) == .lt) {
-                    llsub(r.limbs[0..], a.limbs[0..a.limbs.len], b.limbs[0..b.limbs.len]);
-                    r.normalize(a.limbs.len);
-                    r.positive = false;
-                } else {
-                    llsub(r.limbs[0..], b.limbs[0..b.limbs.len], a.limbs[0..a.limbs.len]);
-                    r.normalize(b.limbs.len);
-                    r.positive = true;
-                }
-            }
+        if (r.subCarry(a, b)) {
+            // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
+            // so we need to set the length here.
+            const msl = math.max(a.limbs.len, b.limbs.len);
+            // `addCarry` normalizes by `msl`, so we need to fix up the result manually here.
+            // Note, the fact that it normalized means that the intermediary limbs are zero here.
+            r.len = msl + 1;
+            r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
         }
     }
 
@@ -438,45 +464,26 @@ pub const Mutable = struct {
     pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
         const req_limbs = calcTwosCompLimbCount(bit_count);
 
-        // We can ignore the upper bits here, those results will be discarded anyway.
-        // We also don't need to mind order here. Again, overflow is ignored here.
-        const a_limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)];
-        const b_limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)];
+        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
+        // if an overflow occured.
+        const x = Const{
+            .positive = a.positive,
+            .limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)],
+        };
 
-        if (a.positive != b.positive) {
-            if (a.positive) {
-                // (a) - (-b) => a + b
-                r.addWrap(a, b.abs(), signedness, bit_count);
-            } else {
-                // (-a) - (b) => -a + -b
-                // Note, we don't do -(a + b) here to avoid a second truncate.
-                r.addWrap(a, b.negate(), signedness, bit_count);
-            }
-            // Don't need to truncate, addWrap does that for us.
-            return;
-        } else if (a.positive) {
-            if (a_limbs.len >= b_limbs.len) {
-                // (a) - (b) => a - b
-                _ = llsubcarry(r.limbs, a_limbs, b_limbs);
-                r.normalize(a_limbs.len);
-                r.positive = true;
-            } else {
-                // (a) - (b) => -b + a => -(b - a)
-                _ = llsubcarry(r.limbs, b_limbs, a_limbs);
-                r.normalize(b_limbs.len);
-                r.positive = false;
-            }
-        } else {
-            if (a_limbs.len >= b_limbs.len) {
-                // (-a) - (-b) => -(a - b)
-                _ = llsubcarry(r.limbs, a_limbs, b_limbs);
-                r.normalize(a_limbs.len);
-                r.positive = false;
-            } else {
-                // (-a) - (-b) => --b + -a => b - a
-                _ = llsubcarry(r.limbs, b_limbs, a_limbs);
-                r.normalize(b_limbs.len);
-                r.positive = true;
+        const y = Const{
+            .positive = b.positive,
+            .limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)],
+        };
+
+        if (r.subCarry(x, y)) {
+            // There are two possibilities here:
+            // - We overflowed req_limbs. In this case, the carry is ignored.
+            // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
+            const msl = math.max(a.limbs.len, b.limbs.len);
+            if (msl < req_limbs) {
+                r.limbs[msl] = 1;
+                r.len = req_limbs;
             }
         }
 
-- 
cgit v1.2.3


From f1b3a90ef6b29f1b43b43350a05ffe75ddf7ca2c Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Fri, 1 Oct 2021 14:05:55 +0200
Subject: big ints: setTwosCompIntLimit

This function can be used to initialize a big integer to either the upper
or lower limit of a 2s-complement integer. Note that the result is still
in sign-magnitude representation, though in order to convert it into twos
complement all one has to do is take the absolute value.
---
 lib/std/math/big/int.zig      | 110 +++++++++++++++++++++++++++++++++++++++---
 lib/std/math/big/int_test.zig |  30 ++++++++++++
 2 files changed, 132 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 4494da3f55..41fe63f3da 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -86,6 +86,15 @@ pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
     return r1;
 }
 
+/// Used to indicate either limit of a 2s-complement integer.
+pub const TwosCompIntLimit = enum {
+    // The low limit, either 0x00 (unsigned) or (-)0x80 (signed) for an 8-bit integer.
+    min,
+
+    // The high limit, either 0xFF (unsigned) or 0x7F (signed) for an 8-bit integer.
+    max,
+};
+
 /// A arbitrary-precision big integer, with a fixed set of mutable limbs.
 pub const Mutable = struct {
     /// Raw digits. These are:
@@ -287,6 +296,75 @@ pub const Mutable = struct {
         self.positive = positive;
     }
 
+    /// Set self to either bound of a 2s-complement integer.
+    /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
+    /// result to twos complement, it is sufficient to take the absolute value.
+    ///
+    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn setTwosCompIntLimit(
+        r: *Mutable,
+        limit: TwosCompIntLimit,
+        signedness: std.builtin.Signedness,
+        bit_count: usize
+    ) void {
+        // Handle zero-bit types.
+        if (bit_count == 0) {
+            r.set(0);
+            return;
+        }
+
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+        const bit = @truncate(Log2Limb, bit_count - 1);
+        const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
+        const mask = (signmask << 1) -% 1;    // 0b0..011..1 where the leftmost 1 is the sign bit.
+
+        r.positive = true;
+
+        switch (signedness) {
+            .signed => switch (limit) {
+                .min => {
+                    // Negative bound, signed = -0x80.
+                    r.len = req_limbs;
+                    mem.set(Limb, r.limbs[0..r.len - 1], 0);
+                    r.limbs[r.len - 1] = signmask;
+                    r.positive = false;
+                },
+                .max => {
+                    // Positive bound, signed = 0x7F
+                    // Note, in this branch we need to normalize because the first bit is
+                    // supposed to be 0.
+
+                    // Special case for 1-bit integers.
+                    if (bit_count == 1) {
+                        r.set(0);
+                    } else {
+                        const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
+                        const msb = @truncate(Log2Limb, bit_count - 2);
+                        const new_signmask = @as(Limb, 1) << msb;  // 0b0..010..0 where 1 is the sign bit.
+                        const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.
+
+                        r.len = new_req_limbs;
+                        std.mem.set(Limb, r.limbs[0..r.len - 1], maxInt(Limb));
+                        r.limbs[r.len - 1] = new_mask;
+                    }
+                },
+            },
+            .unsigned => switch (limit) {
+                .min => {
+                    // Min bound, unsigned = 0x00
+                    r.set(0);
+                },
+                .max => {
+                    // Max bound, unsigned = 0xFF
+                    r.len = req_limbs;
+                    std.mem.set(Limb, r.limbs[0..r.len - 1], maxInt(Limb));
+                    r.limbs[r.len - 1] = mask;
+                },
+            },
+        }
+    }
+
     /// r = a + scalar
     ///
     /// r and a may be aliases.
@@ -335,7 +413,6 @@ pub const Mutable = struct {
     }
 
     /// r = a + b
-    ///
     /// r, a and b may be aliases.
     ///
     /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
@@ -353,8 +430,8 @@ pub const Mutable = struct {
     }
 
     /// r = a + b with 2s-complement wrapping semantics.
-    ///
     /// r, a and b may be aliases
+    ///
     /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
     /// r is `calcTwosCompLimbCount(bit_count)`.
     pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
@@ -374,7 +451,8 @@ pub const Mutable = struct {
 
         if (r.addCarry(x, y)) {
             // There are two possibilities here:
-            // - We overflowed req_limbs. In this case, the carry is ignored.
+            // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
+            //   truncate anyway.
             // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
             const msl = math.max(a.limbs.len, b.limbs.len);
             if (msl < req_limbs) {
@@ -399,7 +477,7 @@ pub const Mutable = struct {
         } else if (b.eqZero()) {
             r.copy(a);
             return false;
-        } if (a.positive != b.positive) {
+        } else if (a.positive != b.positive) {
             if (a.positive) {
                 // (a) - (-b) => a + b
                 return r.addCarry(a, b.abs());
@@ -478,7 +556,8 @@ pub const Mutable = struct {
 
         if (r.subCarry(x, y)) {
             // There are two possibilities here:
-            // - We overflowed req_limbs. In this case, the carry is ignored.
+            // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
+            //   truncate anyway.
             // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
             const msl = math.max(a.limbs.len, b.limbs.len);
             if (msl < req_limbs) {
@@ -1131,8 +1210,8 @@ pub const Mutable = struct {
         }
 
         const bit = @truncate(Log2Limb, bit_count - 1);
-        const signmask = @as(Limb, 1) << bit;
-        const mask = (signmask << 1) -% 1;
+        const signmask = @as(Limb, 1) << bit; // 0b0..010...0 where 1 is the sign bit.
+        const mask = (signmask << 1) -% 1; // 0b0..01..1 where the leftmost 1 is the sign bit.
 
         if (!a.positive) {
             // Convert the integer from sign-magnitude into twos-complement.
@@ -1871,6 +1950,21 @@ pub const Managed = struct {
         self.setMetadata(m.positive, m.len);
     }
 
+    /// Set self to either bound of a 2s-complement integer.
+    /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
+    /// result to twos complement, it is sufficient to take the absolute value.
+    pub fn setTwosCompIntLimit(
+        r: *Managed,
+        limit: TwosCompIntLimit,
+        signedness: std.builtin.Signedness,
+        bit_count: usize
+    ) !void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.setTwosCompIntLimit(limit, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
+
     /// Converts self to a string in the requested base. Memory is allocated from the provided
     /// allocator and not the one present in self.
     pub fn toString(self: Managed, allocator: *Allocator, base: u8, case: std.fmt.Case) ![]u8 {
@@ -2184,7 +2278,7 @@ pub const Managed = struct {
         }
     }
 
-    // r = truncate(Int(signedness, bit_count), a)
+    /// r = truncate(Int(signedness, bit_count), a)
     pub fn truncate(r: *Managed, a: Const, signedness: std.builtin.Signedness, bit_count: usize) !void {
         try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
         var m = r.toMutable();
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 7b5e14b808..bdf75cc6fe 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -269,6 +269,36 @@ test "big.int string set bad base error" {
     try testing.expectError(error.InvalidBase, a.setString(45, "10"));
 }
 
+test "big.int twos complement limit set" {
+    const test_types = [_]type{
+        u64,
+        i64,
+        u1,
+        i1,
+        u0,
+        i0,
+        u65,
+        i65,
+    };
+
+    inline for (test_types) |T| {
+        // To work around 'control flow attempts to use compile-time variable at runtime'
+        const U = T;
+        const int_info = @typeInfo(U).Int;
+
+        var a = try Managed.init(testing.allocator);
+        defer a.deinit();
+
+        try a.setTwosCompIntLimit(.max, int_info.signedness, int_info.bits);
+        var max: U = maxInt(U);
+        try testing.expect(max == try a.to(U));
+
+        try a.setTwosCompIntLimit(.min, int_info.signedness, int_info.bits);
+        var min: U = minInt(U);
+        try testing.expect(min == try a.to(U));
+    }
+}
+
 test "big.int string to" {
     var a = try Managed.initSet(testing.allocator, 120317241209124781241290847124);
     defer a.deinit();
-- 
cgit v1.2.3


From 16991f920b4af8432ff97f83f02c3a4e614f480f Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Fri, 1 Oct 2021 14:27:52 +0200
Subject: big ints: saturating addition

---
 lib/std/math/big/int.zig | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 41fe63f3da..8d4d4793f0 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -454,6 +454,7 @@ pub const Mutable = struct {
             // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
             //   truncate anyway.
             // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
+            //   Note: after this we still might need to wrap.
             const msl = math.max(a.limbs.len, b.limbs.len);
             if (msl < req_limbs) {
                 r.limbs[msl] = 1;
@@ -464,6 +465,48 @@ pub const Mutable = struct {
         r.truncate(r.toConst(), signedness, bit_count);
     }
 
+    /// r = a + b with 2s-complement saturating semantics.
+    /// r, a and b may be aliases.
+    ///
+    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+
+        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
+        // if an overflow occured.
+        const x = Const{
+            .positive = a.positive,
+            .limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)],
+        };
+
+        const y = Const{
+            .positive = b.positive,
+            .limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)],
+        };
+
+        if (r.addCarry(x, y)) {
+            // There are two possibilities here:
+            // - We overflowed req_limbs, in which case we need to saturate.
+            // - a and b had less elements than req_limbs, and those were overflowed.
+            //   Note: In this case, might _also_ need to saturate.
+            const msl = math.max(a.limbs.len, b.limbs.len);
+            if (msl < req_limbs) {
+                r.limbs[msl] = 1;
+                r.len = req_limbs;
+                // Note: Saturation may still be required if msl == req_limbs - 1
+            } else {
+                // Overflowed req_limbs, definitely saturate.
+                r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
+            }
+        }
+
+        // Saturate if the result didn't fit.
+        if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
+            r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
+        }
+    }
+
     /// Base implementation for subtraction. Subtracts `max(a.limbs.len, b.limbs.len)` elements from a and b,
     /// and returns whether any overflow occured.
     /// r, a and b may be aliases.
@@ -559,6 +602,7 @@ pub const Mutable = struct {
             // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
             //   truncate anyway.
             // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
+            //   Note: after this we still might need to wrap.
             const msl = math.max(a.limbs.len, b.limbs.len);
             if (msl < req_limbs) {
                 r.limbs[msl] = 1;
@@ -2070,6 +2114,13 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    pub fn addSat(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.addSat(a, b, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
+
     /// r = a - b
     ///
     /// r, a and b may be aliases.
-- 
cgit v1.2.3


From bb53f4f15a76fd306c8ec2f51369d7ffcdf4c0ca Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Fri, 1 Oct 2021 14:28:09 +0200
Subject: big ints: implement normal/wrapping/saturating subtraction in terms
 of addition

---
 lib/std/math/big/int.zig | 53 ++++++++++++++++--------------------------------
 1 file changed, 17 insertions(+), 36 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 8d4d4793f0..b55b1eb9b5 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -566,15 +566,7 @@ pub const Mutable = struct {
     /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
     /// r is `math.max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
     pub fn sub(r: *Mutable, a: Const, b: Const) void {
-        if (r.subCarry(a, b)) {
-            // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
-            // so we need to set the length here.
-            const msl = math.max(a.limbs.len, b.limbs.len);
-            // `addCarry` normalizes by `msl`, so we need to fix up the result manually here.
-            // Note, the fact that it normalized means that the intermediary limbs are zero here.
-            r.len = msl + 1;
-            r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
-        }
+        r.add(a, b.negate());
     }
 
     /// r = a - b with 2s-complement wrapping semantics.
@@ -583,34 +575,16 @@ pub const Mutable = struct {
     /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
     /// r is `calcTwosCompLimbCount(bit_count)`.
     pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
-        const req_limbs = calcTwosCompLimbCount(bit_count);
-
-        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
-        // if an overflow occured.
-        const x = Const{
-            .positive = a.positive,
-            .limbs = a.limbs[0..math.min(req_limbs, a.limbs.len)],
-        };
-
-        const y = Const{
-            .positive = b.positive,
-            .limbs = b.limbs[0..math.min(req_limbs, b.limbs.len)],
-        };
-
-        if (r.subCarry(x, y)) {
-            // There are two possibilities here:
-            // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
-            //   truncate anyway.
-            // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
-            //   Note: after this we still might need to wrap.
-            const msl = math.max(a.limbs.len, b.limbs.len);
-            if (msl < req_limbs) {
-                r.limbs[msl] = 1;
-                r.len = req_limbs;
-            }
-        }
+        r.addWrap(a, b.negate(), signedness, bit_count);
+    }
 
-        r.truncate(r.toConst(), signedness, bit_count);
+    /// r = a - b with 2s-complement saturating semantics.
+    /// r, a and b may be aliases.
+    ///
+    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
+    /// r is `calcTwosCompLimbCount(bit_count)`.
+    pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        r.addSat(a, b.negate(), signedness, bit_count);
     }
 
     /// rma = a * b
@@ -2140,6 +2114,13 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    pub fn subSat(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.subSat(a, b, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
+
     /// rma = a * b
     ///
     /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-- 
cgit v1.2.3


From 52721d3a7eb2695c63e22978c0247027b1790f9e Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Fri, 1 Oct 2021 15:22:51 +0200
Subject: big ints: [add|sub]Wrap tests

---
 lib/std/math/big.zig          |  2 +
 lib/std/math/big/int_test.zig | 97 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big.zig b/lib/std/math/big.zig
index 8c0f7f5e1e..e7f8a7fb34 100644
--- a/lib/std/math/big.zig
+++ b/lib/std/math/big.zig
@@ -5,6 +5,7 @@ pub const Rational = @import("big/rational.zig").Rational;
 pub const int = @import("big/int.zig");
 pub const Limb = usize;
 const limb_info = @typeInfo(Limb).Int;
+pub const SignedLimb = std.meta.Int(.signed, limb_info.bits);
 pub const DoubleLimb = std.meta.Int(.unsigned, 2 * limb_info.bits);
 pub const SignedDoubleLimb = std.meta.Int(.signed, 2 * limb_info.bits);
 pub const Log2Limb = std.math.Log2Int(Limb);
@@ -19,6 +20,7 @@ test {
     _ = int;
     _ = Rational;
     _ = Limb;
+    _ = SignedLimb;
     _ = DoubleLimb;
     _ = SignedDoubleLimb;
     _ = Log2Limb;
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index bdf75cc6fe..d47346a84c 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -4,6 +4,7 @@ const testing = std.testing;
 const Managed = std.math.big.int.Managed;
 const Mutable = std.math.big.int.Mutable;
 const Limb = std.math.big.Limb;
+const SignedLimb = std.math.big.SignedLimb;
 const DoubleLimb = std.math.big.DoubleLimb;
 const SignedDoubleLimb = std.math.big.SignedDoubleLimb;
 const maxInt = std.math.maxInt;
@@ -575,6 +576,102 @@ test "big.int add scalar" {
     try testing.expect((try b.to(u32)) == 55);
 }
 
+test "big.int addWrap single-single, unsigned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(u17));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 10);
+    defer b.deinit();
+
+    try a.addWrap(a.toConst(), b.toConst(), .unsigned, 17);
+
+    try testing.expect((try a.to(u17)) == 9);
+}
+
+test "big.int subWrap single-single, unsigned" {
+    var a = try Managed.initSet(testing.allocator, 0);
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(u17));
+    defer b.deinit();
+
+    try a.subWrap(a.toConst(), b.toConst(), .unsigned, 17);
+
+    try testing.expect((try a.to(u17)) == 1);
+}
+
+test "big.int addWrap multi-multi, unsigned, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(DoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(DoubleLimb));
+    defer b.deinit();
+
+    try a.addWrap(a.toConst(), b.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect((try a.to(DoubleLimb)) == maxInt(DoubleLimb) - 1);
+}
+
+test "big.int subWrap single-multi, unsigned, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, 10);
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(DoubleLimb) + 100);
+    defer b.deinit();
+
+    try a.subWrap(a.toConst(), b.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect((try a.to(DoubleLimb)) == maxInt(DoubleLimb) - 88);
+}
+
+test "big.int addWrap single-single, signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(i21));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1 + 1 + maxInt(u21));
+    defer b.deinit();
+
+    try a.addWrap(a.toConst(), b.toConst(), .signed, @bitSizeOf(i21));
+
+    try testing.expect((try a.to(i21)) == minInt(i21));
+}
+
+test "big.int subWrap single-single, signed" {
+    var a = try Managed.initSet(testing.allocator, minInt(i21));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1);
+    defer b.deinit();
+
+    try a.subWrap(a.toConst(), b.toConst(), .signed, @bitSizeOf(i21));
+
+    try testing.expect((try a.to(i21)) == maxInt(i21));
+}
+
+test "big.int addWrap multi-multi, signed, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer b.deinit();
+
+    try a.addWrap(a.toConst(), b.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
+
+    try testing.expect((try a.to(SignedDoubleLimb)) == -2);
+}
+
+test "big.int subWrap single-multi, signed, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, minInt(SignedDoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1);
+    defer b.deinit();
+
+    try a.subWrap(a.toConst(), b.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
+
+    try testing.expect((try a.to(SignedDoubleLimb)) == maxInt(SignedDoubleLimb));
+}
+
 test "big.int sub single-single" {
     var a = try Managed.initSet(testing.allocator, 50);
     defer a.deinit();
-- 
cgit v1.2.3


From 692827baa79590c20f09ae598b2c27663f72a207 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Fri, 1 Oct 2021 15:32:50 +0200
Subject: big ints: [add|sub]Sat tests

---
 lib/std/math/big/int_test.zig | 96 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index d47346a84c..537a472223 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -672,6 +672,102 @@ test "big.int subWrap single-multi, signed, limb aligned" {
     try testing.expect((try a.to(SignedDoubleLimb)) == maxInt(SignedDoubleLimb));
 }
 
+test "big.int addSat single-single, unsigned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(u17) - 5);
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 10);
+    defer b.deinit();
+
+    try a.addSat(a.toConst(), b.toConst(), .unsigned, 17);
+
+    try testing.expect((try a.to(u17)) == maxInt(u17));
+}
+
+test "big.int subSat single-single, unsigned" {
+    var a = try Managed.initSet(testing.allocator, 123);
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 4000);
+    defer b.deinit();
+
+    try a.subSat(a.toConst(), b.toConst(), .unsigned, 17);
+
+    try testing.expect((try a.to(u17)) == 0);
+}
+
+test "big.int addSat multi-multi, unsigned, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(DoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(DoubleLimb));
+    defer b.deinit();
+
+    try a.addSat(a.toConst(), b.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect((try a.to(DoubleLimb)) == maxInt(DoubleLimb));
+}
+
+test "big.int subSat single-multi, unsigned, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, 10);
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(DoubleLimb) + 100);
+    defer b.deinit();
+
+    try a.subSat(a.toConst(), b.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect((try a.to(DoubleLimb)) == 0);
+}
+
+test "big.int addSat single-single, signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(i14));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1);
+    defer b.deinit();
+
+    try a.addSat(a.toConst(), b.toConst(), .signed, @bitSizeOf(i14));
+
+    try testing.expect((try a.to(i14)) == maxInt(i14));
+}
+
+test "big.int subSat single-single, signed" {
+    var a = try Managed.initSet(testing.allocator, minInt(i21));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1);
+    defer b.deinit();
+
+    try a.subSat(a.toConst(), b.toConst(), .signed, @bitSizeOf(i21));
+
+    try testing.expect((try a.to(i21)) == minInt(i21));
+}
+
+test "big.int addSat multi-multi, signed, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer b.deinit();
+
+    try a.addSat(a.toConst(), b.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
+
+    try testing.expect((try a.to(SignedDoubleLimb)) == maxInt(SignedDoubleLimb));
+}
+
+test "big.int subSat single-multi, signed, limb aligned" {
+    var a = try Managed.initSet(testing.allocator, minInt(SignedDoubleLimb));
+    defer a.deinit();
+
+    var b = try Managed.initSet(testing.allocator, 1);
+    defer b.deinit();
+
+    try a.subSat(a.toConst(), b.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
+
+    try testing.expect((try a.to(SignedDoubleLimb)) == minInt(SignedDoubleLimb));
+}
+
 test "big.int sub single-single" {
     var a = try Managed.initSet(testing.allocator, 50);
     defer a.deinit();
-- 
cgit v1.2.3


From 15351f206fba9f7b2bba2bfce90cb6359846ce3c Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sat, 2 Oct 2021 00:39:28 +0200
Subject: big.int: truncate tests

---
 lib/std/math/big/int_test.zig | 66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 537a472223..162be309aa 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -1503,6 +1503,72 @@ test "big.int div multi-multi fuzz case #2" {
     try testing.expect(std.mem.eql(u8, rs, "a900000000000000000000000000000000000000000000000000"));
 }
 
+test "big.int truncate single unsigned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(u47));
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .unsigned, 17);
+
+    try testing.expect((try a.to(u17)) == maxInt(u17));
+}
+
+test "big.int truncate single signed" {
+    var a = try Managed.initSet(testing.allocator, 0x1_0000);
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .signed, 17);
+
+    try testing.expect((try a.to(i17)) == minInt(i17));
+}
+
+test "big.int truncate multi to single unsigned" {
+    var a = try Managed.initSet(testing.allocator, (maxInt(Limb) + 1) | 0x1234_5678_9ABC_DEF0);
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .unsigned, 37);
+
+    try testing.expect((try a.to(u37)) == 0x18_9ABC_DEF0);
+}
+
+test "big.int truncate multi to single signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(Limb) << 10);
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .signed, @bitSizeOf(i11));
+
+    try testing.expect((try a.to(i11)) == minInt(i11));
+}
+
+test "big.int truncate multi to multi unsigned" {
+    const bits = @typeInfo(SignedDoubleLimb).Int.bits;
+    const Int = std.meta.Int(.unsigned, bits - 1);
+
+    var a = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .unsigned, bits - 1);
+
+    try testing.expect((try a.to(Int)) == maxInt(Int));
+}
+
+test "big.int truncate multi to multi signed" {
+    var a = try Managed.initSet(testing.allocator, 3 << @bitSizeOf(Limb));
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .signed, @bitSizeOf(Limb) + 1);
+
+    try testing.expect((try a.to(std.meta.Int(.signed, @bitSizeOf(Limb) + 1))) == -1 << @bitSizeOf(Limb));
+}
+
+test "big.int truncate negative multi to single" {
+    var a = try Managed.initSet(testing.allocator, -@as(SignedDoubleLimb, maxInt(Limb) + 1));
+    defer a.deinit();
+
+    try a.truncate(a.toConst(), .signed, @bitSizeOf(i17));
+
+    try testing.expect((try a.to(i17)) == 0);
+}
+
 test "big.int shift-right single" {
     var a = try Managed.initSet(testing.allocator, 0xffff0000);
     defer a.deinit();
-- 
cgit v1.2.3


From 5907b3e3830f95d111d9d60027af1350b81f4378 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 01:20:10 +0200
Subject: big ints: Improve karatsuba multiplication

---
 lib/std/math/big/int.zig | 296 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 211 insertions(+), 85 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index b55b1eb9b5..90961db3dc 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -86,6 +86,24 @@ pub fn addMulLimbWithCarry(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
     return r1;
 }
 
+/// a - b * c - *carry, sets carry to the overflow bits
+fn subMulLimbWithBorrow(a: Limb, b: Limb, c: Limb, carry: *Limb) Limb {
+    // r1 = a - *carry
+    var r1: Limb = undefined;
+    const c1: Limb = @boolToInt(@subWithOverflow(Limb, a, carry.*, &r1));
+
+    // r2 = b * c
+    const bc = @as(DoubleLimb, std.math.mulWide(Limb, b, c));
+    const r2 = @truncate(Limb, bc);
+    const c2 = @truncate(Limb, bc >> limb_bits);
+
+    // r1 = r1 - r2
+    const c3: Limb = @boolToInt(@subWithOverflow(Limb, r1, r2, &r1));
+    carry.* = c1 + c2 + c3;
+
+    return r1;
+}
+
 /// Used to indicate either limit of a 2s-complement integer.
 pub const TwosCompIntLimit = enum {
     // The low limit, either 0x00 (unsigned) or (-)0x80 (signed) for an 8-bit integer.
@@ -640,7 +658,7 @@ pub const Mutable = struct {
 
         mem.set(Limb, rma.limbs[0 .. a.limbs.len + b.limbs.len + 1], 0);
 
-        llmulacc(allocator, rma.limbs, a.limbs, b.limbs);
+        llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
 
         rma.normalize(a.limbs.len + b.limbs.len);
         rma.positive = (a.positive == b.positive);
@@ -665,9 +683,9 @@ pub const Mutable = struct {
         mem.set(Limb, rma.limbs[0..req_limbs], 0);
 
         if (a_limbs.len >= b_limbs.len) {
-            llmulacc_lo(rma.limbs, a_limbs, b_limbs);
+            llmulaccLow(rma.limbs, a_limbs, b_limbs);
         } else {
-            llmulacc_lo(rma.limbs, b_limbs, a_limbs);
+            llmulaccLow(rma.limbs, b_limbs, a_limbs);
         }
 
         rma.normalize(math.min(req_limbs, a.limbs.len + b.limbs.len));
@@ -691,7 +709,7 @@ pub const Mutable = struct {
 
         mem.set(Limb, rma.limbs, 0);
 
-        llsquare_basecase(rma.limbs, a.limbs);
+        llsquareBasecase(rma.limbs, a.limbs);
 
         rma.normalize(2 * a.limbs.len + 1);
         rma.positive = true;
@@ -1219,7 +1237,7 @@ pub const Mutable = struct {
     /// Asserts `r` has enough storage to store the result.
     /// The upper bound is `calcTwosCompLimbCount(a.len)`.
     pub fn truncate(r: *Mutable, a: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
-        const req_limbs = (bit_count + @bitSizeOf(Limb) - 1) / @bitSizeOf(Limb);
+        const req_limbs = calcTwosCompLimbCount(bit_count);
 
         // Handle 0-bit integers.
         if (req_limbs == 0 or a.eqZero()) {
@@ -2319,8 +2337,8 @@ pub const Managed = struct {
     }
 };
 
-/// r = a * b, ignoring overflow
-fn llmulacc_lo(r: []Limb, a: []const Limb, b: []const Limb) void {
+/// r = r + a * b, ignoring overflow
+fn llmulaccLow(r: []Limb, a: []const Limb, b: []const Limb) void {
     assert(r.len >= a.len);
     assert(a.len >= b.len);
 
@@ -2328,32 +2346,41 @@ fn llmulacc_lo(r: []Limb, a: []const Limb, b: []const Limb) void {
 
     var i: usize = 0;
     while (i < b.len) : (i += 1) {
-        llmulDigit(r[i..], a, b[i]);
+        llmulLimb(.add, r[i..], a, b[i]);
     }
 }
 
+/// Different operators which can be used in accumulation style functions
+/// (llmulacc, llmulaccKaratsuba, llmulaccLong, llmulLimb). In all these functions,
+/// a computed value is accumulated with an existing result.
+const AccOp = enum {
+    /// The computed value is added to the result.
+    add,
+
+    /// The computed value is subtracted from the result.
+    sub,
+};
+
 /// Knuth 4.3.1, Algorithm M.
 ///
+/// r = r (op) a * b
 /// r MUST NOT alias any of a or b.
-fn llmulacc(opt_allocator: ?*Allocator, r: []Limb, a: []const Limb, b: []const Limb) void {
+fn llmulacc(comptime op: AccOp, opt_allocator: ?*Allocator, r: []Limb, a: []const Limb, b: []const Limb) void {
     @setRuntimeSafety(debug_safety);
+    assert(r.len >= a.len + b.len);
 
-    const a_norm = a[0..llnormalize(a)];
-    const b_norm = b[0..llnormalize(b)];
-    var x = a_norm;
-    var y = b_norm;
-    if (a_norm.len > b_norm.len) {
-        x = b_norm;
-        y = a_norm;
+    // Order greatest first.
+    var x = a;
+    var y = b;
+    if (a.len < b.len) {
+        x = b;
+        y = a;
     }
 
-    assert(r.len >= x.len + y.len + 1);
-
-    // 48 is a pretty abitrary size chosen based on performance of a factorial program.
     k_mul: {
-        if (x.len > 48) {
+        if (y.len > 48) {
             if (opt_allocator) |allocator| {
-                llmulacc_karatsuba(allocator, r, x, y) catch |err| switch (err) {
+                llmulaccKaratsuba(op, allocator, r, x, y) catch |err| switch (err) {
                     error.OutOfMemory => break :k_mul, // handled below
                 };
                 return;
@@ -2361,83 +2388,153 @@ fn llmulacc(opt_allocator: ?*Allocator, r: []Limb, a: []const Limb, b: []const L
         }
     }
 
-    // Basecase multiplication
-    var i: usize = 0;
-    while (i < x.len) : (i += 1) {
-        llmulDigit(r[i..], y, x[i]);
-    }
+    llmulaccLong(op, r, x, y);
 }
 
 /// Knuth 4.3.1, Algorithm M.
 ///
+/// r = r (op) a * b
 /// r MUST NOT alias any of a or b.
-fn llmulacc_karatsuba(allocator: *Allocator, r: []Limb, x: []const Limb, y: []const Limb) error{OutOfMemory}!void {
+fn llmulaccKaratsuba(
+    comptime op: AccOp,
+    allocator: *Allocator,
+    r: []Limb,
+    a: []const Limb,
+    b: []const Limb,
+) error{OutOfMemory}!void {
     @setRuntimeSafety(debug_safety);
+    assert(r.len >= a.len + b.len);
+    assert(a.len >= b.len);
 
-    assert(r.len >= x.len + y.len + 1);
-
-    const split = @divFloor(x.len, 2);
-    var x0 = x[0..split];
-    var x1 = x[split..x.len];
-    var y0 = y[0..split];
-    var y1 = y[split..y.len];
-
-    var tmp = try allocator.alloc(Limb, x1.len + y1.len + 1);
+    // Classical karatsuba algorithm:
+    // a = a1 * B + a0
+    // b = b1 * B + b0
+    // Where a0, b0 < B
+    //
+    // We then have:
+    // ab = a * b
+    //    = (a1 * B + a0) * (b1 * B + b0)
+    //    = a1 * b1 * B * B + a1 * B * b0 + a0 * b1 * B + a0 * b0
+    //    = a1 * b1 * B * B + (a1 * b0 + a0 * b1) * B + a0 * b0
+    //
+    // Note that:
+    // a1 * b0 + a0 * b1
+    //    = (a1 + a0)(b1 + b0) - a1 * b1 - a0 * b0
+    //    = (a0 - a1)(b1 - b0) + a1 * b1 + a0 * b0
+    //
+    // This yields:
+    // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
+    //
+    // Where:
+    // p0 = a0 * b0
+    // p1 = (a0 - a1)(b1 - b0)
+    // p2 = a1 * b1
+    //
+    // Note, (a0 - a1) and (b1 - b0) produce values -B < x < B, and so we need to mind the sign here.
+    // We also have:
+    // 0 <= p0 <= 2B
+    // -2B <= p1 <= 2B
+    //
+    // Note, when B is a multiple of the limb size, multiplies by B amount to shifts or
+    // slices of a limbs array.
+
+    const split = b.len / 2; // B
+    const a0 = a[0..llnormalize(a[0..split])];
+    const a1 = a[split..][0..llnormalize(a[split..])];
+    const b0 = b[0..llnormalize(b[0..split])];
+    const b1 = b[split..][0..llnormalize(b[split..])];
+
+    // Note that the above slices work because we have a.len > b.len.
+    // We now also have:
+    // a1.len >= a0.len
+    // a1.len >= b1.len >= b0.len
+    // a0.len == b0.len
+
+    // We need some temporary memory to store intermediate results.
+    // Note, we can reduce the amount of temporaries we need by reordering the computation here:
+    // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
+    //    = p2 * B^2 + (p0 * B + p1 * B + p2 * B) + p0
+    //    = (p2 * B^2 + p2 * B) + (p0 * B + p0) + p1 * B
+    // By allocating a1.len * b1.len we can be sure that all the intermediary results fit.
+    const tmp = try allocator.alloc(Limb, a.len - split + b.len - split);
     defer allocator.free(tmp);
-    mem.set(Limb, tmp, 0);
 
-    llmulacc(allocator, tmp, x1, y1);
+    // Compute p2.
+    mem.set(Limb, tmp, 0);
+    llmulacc(.add, allocator, tmp, a1, b1);
+    const p2 = tmp[0 .. llnormalize(tmp)];
 
-    var length = llnormalize(tmp);
-    _ = llaccum(r[split..], tmp[0..length]);
-    _ = llaccum(r[split * 2 ..], tmp[0..length]);
+    // Add terms p2 * B^2 and p2 * B to the result.
+    _ = llaccum(op, r[split..], p2);
+    _ = llaccum(op, r[split * 2..], p2);
 
-    mem.set(Limb, tmp[0..length], 0);
+    // Compute p0.
+    mem.set(Limb, p2, 0);
+    llmulacc(.add, allocator, tmp, a0, b0);
+    const p0 = tmp[0 .. llnormalize(tmp[0..a0.len + b0.len])];
 
-    llmulacc(allocator, tmp, x0, y0);
+    // Add terms p0 * B and p0 to the result.
+    _ = llaccum(op, r, p0);
+    _ = llaccum(op, r[split..], p0);
 
-    length = llnormalize(tmp);
-    _ = llaccum(r[0..], tmp[0..length]);
-    _ = llaccum(r[split..], tmp[0..length]);
+    // Finally, compute and add p1.
+    const j0_sign = llcmp(a0, a1);
+    const j1_sign = llcmp(b1, b0);
 
-    const x_cmp = llcmp(x1, x0);
-    const y_cmp = llcmp(y1, y0);
-    if (x_cmp * y_cmp == 0) {
+    if (j0_sign * j1_sign == 0) {
+        // p1 is zero, we don't need to do any computation at all.
         return;
     }
-    const x0_len = llnormalize(x0);
-    const x1_len = llnormalize(x1);
-    var j0 = try allocator.alloc(Limb, math.max(x0_len, x1_len));
-    defer allocator.free(j0);
-    if (x_cmp == 1) {
-        llsub(j0, x1[0..x1_len], x0[0..x0_len]);
+
+    mem.set(Limb, tmp, 0);
+
+    // p1 is nonzero, so compute the intermediary terms j0 = a0 - a1 and j1 = b1 - b0.
+    // Note that in this case, we again need some storage for intermediary results
+    // j0 and j1. Since we have tmp.len >= 2B, we can store both
+    // intermediaries in the already allocated array.
+    const j0 = tmp[0..a1.len];
+    const j1 = tmp[a1.len..];
+
+    // Ensure that no subtraction overflows.
+    if (j0_sign == 1) {
+        // a0 > a1.
+        _ = llsubcarry(j0, a0, a1);
     } else {
-        llsub(j0, x0[0..x0_len], x1[0..x1_len]);
+        // a0 < a1.
+        _ = llsubcarry(j0, a1, a0);
     }
 
-    const y0_len = llnormalize(y0);
-    const y1_len = llnormalize(y1);
-    var j1 = try allocator.alloc(Limb, math.max(y0_len, y1_len));
-    defer allocator.free(j1);
-    if (y_cmp == 1) {
-        llsub(j1, y1[0..y1_len], y0[0..y0_len]);
+    if (j1_sign == 1) {
+        // b1 > b0.
+        _ = llsubcarry(j1, b1, b0);
     } else {
-        llsub(j1, y0[0..y0_len], y1[0..y1_len]);
+        // b1 > b0.
+        _ = llsubcarry(j1, b0, b1);
     }
-    if (x_cmp == y_cmp) {
-        mem.set(Limb, tmp[0..length], 0);
-        llmulacc(allocator, tmp, j0, j1);
 
-        length = llnormalize(tmp);
-        llsub(r[split..], r[split..], tmp[0..length]);
+    if (j0_sign * j1_sign == 1) {
+        // If j0 and j1 are both positive, we now have:
+        // p1 = j0 * j1
+        // If j0 and j1 are both negative, we now have:
+        // p1 = -j0 * -j1 = j0 * j1
+        // In this case we can add p1 to the result using llmulacc.
+        llmulacc(op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
     } else {
-        llmulacc(allocator, r[split..], j0, j1);
+        // In this case either j0 or j1 is negative, an we have:
+        // p1 = -(j0 * j1)
+        // Now we need to subtract instead of accumulate.
+        const inverted_op = if (op == .add) .sub else .add;
+        llmulacc(inverted_op, allocator, r[split..], j0[0..llnormalize(j0)], j1[0..llnormalize(j1)]);
     }
 }
 
-// r = r + a
-fn llaccum(r: []Limb, a: []const Limb) Limb {
+// r = r (op) a
+fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) Limb {
     @setRuntimeSafety(debug_safety);
+    if (op == .sub) {
+        return llsubcarry(r, r, a);
+    }
+
     assert(r.len != 0 and a.len != 0);
     assert(r.len >= a.len);
 
@@ -2486,24 +2583,53 @@ pub fn llcmp(a: []const Limb, b: []const Limb) i8 {
     }
 }
 
-fn llmulDigit(acc: []Limb, y: []const Limb, xi: Limb) void {
+// r = r (op) y * xi
+fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void {
+    @setRuntimeSafety(debug_safety);
+    assert(r.len >= a.len + b.len);
+    assert(a.len >= b.len);
+
+    var i: usize = 0;
+    while (i < a.len) : (i += 1) {
+        llmulLimb(op, r[i..], b, a[i]);
+    }
+}
+
+// r = r (op) y * xi
+fn llmulLimb(comptime op: AccOp, acc: []Limb, y: []const Limb, xi: Limb) void {
     @setRuntimeSafety(debug_safety);
     if (xi == 0) {
         return;
     }
 
-    var carry: Limb = 0;
     var a_lo = acc[0..y.len];
     var a_hi = acc[y.len..];
 
-    var j: usize = 0;
-    while (j < a_lo.len) : (j += 1) {
-        a_lo[j] = @call(.{ .modifier = .always_inline }, addMulLimbWithCarry, .{ a_lo[j], y[j], xi, &carry });
-    }
+    switch (op) {
+        .add => {
+            var carry: Limb = 0;
+            var j: usize = 0;
+            while (j < a_lo.len) : (j += 1) {
+                a_lo[j] = addMulLimbWithCarry(a_lo[j], y[j], xi, &carry);
+            }
 
-    j = 0;
-    while ((carry != 0) and (j < a_hi.len)) : (j += 1) {
-        carry = @boolToInt(@addWithOverflow(Limb, a_hi[j], carry, &a_hi[j]));
+            j = 0;
+            while ((carry != 0) and (j < a_hi.len)) : (j += 1) {
+                carry = @boolToInt(@addWithOverflow(Limb, a_hi[j], carry, &a_hi[j]));
+            }
+        },
+        .sub => {
+            var borrow: Limb = 0;
+            var j: usize = 0;
+            while (j < a_lo.len) : (j += 1) {
+                a_lo[j] = subMulLimbWithBorrow(a_lo[j], y[j], xi, &borrow);
+            }
+
+            j = 0;
+            while ((borrow != 0) and (j < a_hi.len)) : (j += 1) {
+                borrow = @boolToInt(@subWithOverflow(Limb, a_hi[j], borrow, &a_hi[j]));
+            }
+        },
     }
 }
 
@@ -2964,7 +3090,7 @@ fn llsignedxor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_
 }
 
 /// r MUST NOT alias x.
-fn llsquare_basecase(r: []Limb, x: []const Limb) void {
+fn llsquareBasecase(r: []Limb, x: []const Limb) void {
     @setRuntimeSafety(debug_safety);
 
     const x_norm = x;
@@ -2987,7 +3113,7 @@ fn llsquare_basecase(r: []Limb, x: []const Limb) void {
 
     for (x_norm) |v, i| {
         // Accumulate all the x[i]*x[j] (with x!=j) products
-        llmulDigit(r[2 * i + 1 ..], x_norm[i + 1 ..], v);
+        llmulLimb(.add, r[2 * i + 1 ..], x_norm[i + 1 ..], v);
     }
 
     // Each product appears twice, multiply by 2
@@ -2995,7 +3121,7 @@ fn llsquare_basecase(r: []Limb, x: []const Limb) void {
 
     for (x_norm) |v, i| {
         // Compute and add the squares
-        llmulDigit(r[2 * i ..], x[i .. i + 1], v);
+        llmulLimb(.add, r[2 * i ..], x[i .. i + 1], v);
     }
 }
 
@@ -3034,12 +3160,12 @@ fn llpow(r: []Limb, a: []const Limb, b: u32, tmp_limbs: []Limb) void {
     while (i < exp_bits) : (i += 1) {
         // Square
         mem.set(Limb, tmp2, 0);
-        llsquare_basecase(tmp2, tmp1[0..llnormalize(tmp1)]);
+        llsquareBasecase(tmp2, tmp1[0..llnormalize(tmp1)]);
         mem.swap([]Limb, &tmp1, &tmp2);
         // Multiply by a
         if (@shlWithOverflow(u32, exp, 1, &exp)) {
             mem.set(Limb, tmp2, 0);
-            llmulacc(null, tmp2, tmp1[0..llnormalize(tmp1)], a);
+            llmulacc(.add, null, tmp2, tmp1[0..llnormalize(tmp1)], a);
             mem.swap([]Limb, &tmp1, &tmp2);
         }
     }
-- 
cgit v1.2.3


From 41e9c1bac1c447fe42a191bf16ee25ddb3bba97a Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 16:03:43 +0200
Subject: big ints: Allow llmulaccum to wrap

---
 lib/std/math/big/int.zig | 122 +++++++++++++++++++++++++++++++----------------
 1 file changed, 82 insertions(+), 40 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 90961db3dc..bfcef46bce 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -658,7 +658,7 @@ pub const Mutable = struct {
 
         mem.set(Limb, rma.limbs[0 .. a.limbs.len + b.limbs.len + 1], 0);
 
-        llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
+        _ = llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
 
         rma.normalize(a.limbs.len + b.limbs.len);
         rma.positive = (a.positive == b.positive);
@@ -2365,9 +2365,12 @@ const AccOp = enum {
 ///
 /// r = r (op) a * b
 /// r MUST NOT alias any of a or b.
+///
+/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
 fn llmulacc(comptime op: AccOp, opt_allocator: ?*Allocator, r: []Limb, a: []const Limb, b: []const Limb) void {
     @setRuntimeSafety(debug_safety);
-    assert(r.len >= a.len + b.len);
+    assert(r.len >= a.len);
+    assert(r.len >= b.len);
 
     // Order greatest first.
     var x = a;
@@ -2395,6 +2398,8 @@ fn llmulacc(comptime op: AccOp, opt_allocator: ?*Allocator, r: []Limb, a: []cons
 ///
 /// r = r (op) a * b
 /// r MUST NOT alias any of a or b.
+///
+/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
 fn llmulaccKaratsuba(
     comptime op: AccOp,
     allocator: *Allocator,
@@ -2403,7 +2408,7 @@ fn llmulaccKaratsuba(
     b: []const Limb,
 ) error{OutOfMemory}!void {
     @setRuntimeSafety(debug_safety);
-    assert(r.len >= a.len + b.len);
+    assert(r.len >= a.len);
     assert(a.len >= b.len);
 
     // Classical karatsuba algorithm:
@@ -2437,49 +2442,84 @@ fn llmulaccKaratsuba(
     //
     // Note, when B is a multiple of the limb size, multiplies by B amount to shifts or
     // slices of a limbs array.
+    //
+    // This function computes the result of the multiplication modulo r.len. This means:
+    // - p2 and p1 only need to be computed modulo r.len - B.
+    // - In the case of p2, p2 * B^2 needs to be added modulo r.len - 2 * B.
 
     const split = b.len / 2; // B
+
+    const limbs_after_split = r.len - split; // Limbs to compute for p1 and p2.
+    const limbs_after_split2 = r.len - split * 2; // Limbs to add for p2 * B^2.
+
+    // For a0 and b0 we need the full range.
     const a0 = a[0..llnormalize(a[0..split])];
-    const a1 = a[split..][0..llnormalize(a[split..])];
     const b0 = b[0..llnormalize(b[0..split])];
-    const b1 = b[split..][0..llnormalize(b[split..])];
 
-    // Note that the above slices work because we have a.len > b.len.
-    // We now also have:
-    // a1.len >= a0.len
-    // a1.len >= b1.len >= b0.len
-    // a0.len == b0.len
+    // For a1 and b1 we only need `limbs_after_split` limbs.
+    const a1 = blk: {
+        var a1 = a[split..];
+        a1.len = math.min(llnormalize(a1), limbs_after_split);
+        break :blk a1;
+    };
+
+    const b1 = blk: {
+        var b1 = b[split..];
+        b1.len = math.min(llnormalize(b1), limbs_after_split);
+        break :blk b1;
+    };
+
+    // Note that the above slices relative to `split` work because we have a.len > b.len.
 
     // We need some temporary memory to store intermediate results.
     // Note, we can reduce the amount of temporaries we need by reordering the computation here:
     // ab = p2 * B^2 + (p0 + p1 + p2) * B + p0
     //    = p2 * B^2 + (p0 * B + p1 * B + p2 * B) + p0
     //    = (p2 * B^2 + p2 * B) + (p0 * B + p0) + p1 * B
-    // By allocating a1.len * b1.len we can be sure that all the intermediary results fit.
+
+    // Allocate at least enough memory to be able to multiply the upper two segments of a and b, assuming
+    // no overflow.
     const tmp = try allocator.alloc(Limb, a.len - split + b.len - split);
     defer allocator.free(tmp);
 
     // Compute p2.
-    mem.set(Limb, tmp, 0);
-    llmulacc(.add, allocator, tmp, a1, b1);
-    const p2 = tmp[0 .. llnormalize(tmp)];
+    // Note, we don't need to compute all of p2, just enough limbs to satisfy r.
+    const p2_limbs = math.min(limbs_after_split, a1.len + b1.len);
 
-    // Add terms p2 * B^2 and p2 * B to the result.
-    _ = llaccum(op, r[split..], p2);
-    _ = llaccum(op, r[split * 2..], p2);
+    mem.set(Limb, tmp[0..p2_limbs], 0);
+    llmulacc(.add, allocator, tmp[0..p2_limbs], a1[0..math.min(a1.len, p2_limbs)], b1[0..math.min(b1.len, p2_limbs)]);
+    const p2 = tmp[0 .. llnormalize(tmp[0..p2_limbs])];
+
+    // Add p2 * B to the result.
+    llaccum(op, r[split..], p2);
+
+    // Add p2 * B^2 to the result if required.
+    if (limbs_after_split2 > 0) {
+        llaccum(op, r[split * 2..], p2[0..math.min(p2.len, limbs_after_split2)]);
+    }
 
     // Compute p0.
-    mem.set(Limb, p2, 0);
-    llmulacc(.add, allocator, tmp, a0, b0);
-    const p0 = tmp[0 .. llnormalize(tmp[0..a0.len + b0.len])];
+    // Since a0.len, b0.len <= split and r.len >= split * 2, the full width of p0 needs to be computed.
+    const p0_limbs = a0.len + b0.len;
+    mem.set(Limb, tmp[0..p0_limbs], 0);
+    llmulacc(.add, allocator, tmp[0..p0_limbs], a0, b0);
+    const p0 = tmp[0 .. llnormalize(tmp[0..p0_limbs])];
+
+    // Add p0 to the result.
+    llaccum(op, r, p0);
+
+    // Add p0 * B to the result. In this case, we may not need all of it.
+    llaccum(op, r[split..], p0[0..math.min(limbs_after_split, p0.len)]);
 
-    // Add terms p0 * B and p0 to the result.
-    _ = llaccum(op, r, p0);
-    _ = llaccum(op, r[split..], p0);
 
     // Finally, compute and add p1.
-    const j0_sign = llcmp(a0, a1);
-    const j1_sign = llcmp(b1, b0);
+    // From now on we only need `limbs_after_split` limbs for a0 and b0, since the result of the
+    // following computation will be added * B.
+    const a0x = a0[0..std.math.min(a0.len, limbs_after_split)];
+    const b0x = b0[0..std.math.min(b0.len, limbs_after_split)];
+
+    const j0_sign = llcmp(a0x, a1);
+    const j1_sign = llcmp(b1, b0x);
 
     if (j0_sign * j1_sign == 0) {
         // p1 is zero, we don't need to do any computation at all.
@@ -2492,24 +2532,24 @@ fn llmulaccKaratsuba(
     // Note that in this case, we again need some storage for intermediary results
     // j0 and j1. Since we have tmp.len >= 2B, we can store both
     // intermediaries in the already allocated array.
-    const j0 = tmp[0..a1.len];
-    const j1 = tmp[a1.len..];
+    const j0 = tmp[0..a.len - split];
+    const j1 = tmp[a.len - split..];
 
     // Ensure that no subtraction overflows.
     if (j0_sign == 1) {
         // a0 > a1.
-        _ = llsubcarry(j0, a0, a1);
+        _ = llsubcarry(j0, a0x, a1);
     } else {
         // a0 < a1.
-        _ = llsubcarry(j0, a1, a0);
+        _ = llsubcarry(j0, a1, a0x);
     }
 
     if (j1_sign == 1) {
         // b1 > b0.
-        _ = llsubcarry(j1, b1, b0);
+        _ = llsubcarry(j1, b1, b0x);
     } else {
         // b1 > b0.
-        _ = llsubcarry(j1, b0, b1);
+        _ = llsubcarry(j1, b0x, b1);
     }
 
     if (j0_sign * j1_sign == 1) {
@@ -2528,11 +2568,13 @@ fn llmulaccKaratsuba(
     }
 }
 
-// r = r (op) a
-fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) Limb {
+/// r = r (op) a.
+/// The result is computed modulo `r.len`.
+fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) void {
     @setRuntimeSafety(debug_safety);
     if (op == .sub) {
-        return llsubcarry(r, r, a);
+        _ = llsubcarry(r, r, a);
+        return;
     }
 
     assert(r.len != 0 and a.len != 0);
@@ -2551,8 +2593,6 @@ fn llaccum(comptime op: AccOp, r: []Limb, a: []const Limb) Limb {
     while ((carry != 0) and i < r.len) : (i += 1) {
         carry = @boolToInt(@addWithOverflow(Limb, r[i], carry, &r[i]));
     }
-
-    return carry;
 }
 
 /// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively for limbs.
@@ -2583,19 +2623,21 @@ pub fn llcmp(a: []const Limb, b: []const Limb) i8 {
     }
 }
 
-// r = r (op) y * xi
+/// r = r (op) y * xi
+/// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
 fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void {
     @setRuntimeSafety(debug_safety);
     assert(r.len >= a.len + b.len);
     assert(a.len >= b.len);
 
     var i: usize = 0;
-    while (i < a.len) : (i += 1) {
-        llmulLimb(op, r[i..], b, a[i]);
+    while (i < b.len) : (i += 1) {
+        llmulLimb(op, r[i..], a, b[i]);
     }
 }
 
-// r = r (op) y * xi
+/// r = r (op) y * xi
+/// The result is computed modulo `r.len`.
 fn llmulLimb(comptime op: AccOp, acc: []Limb, y: []const Limb, xi: Limb) void {
     @setRuntimeSafety(debug_safety);
     if (xi == 0) {
-- 
cgit v1.2.3


From fdf13fb81940ada1673914cab4ff46a1ddae42dd Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 16:17:46 +0200
Subject: big ints: Wrapping multiplication

---
 lib/std/math/big/int.zig | 114 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 90 insertions(+), 24 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index bfcef46bce..08e5331a67 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -44,6 +44,11 @@ pub fn calcMulLimbsBufferLen(a_len: usize, b_len: usize, aliases: usize) usize {
     return aliases * math.max(a_len, b_len);
 }
 
+pub fn calcMulWrapLimbsBufferLen(bit_count: usize, a_len: usize, b_len: usize, aliases: usize) usize {
+    const req_limbs = calcTwosCompLimbCount(bit_count);
+    return aliases * math.min(req_limbs, math.max(a_len, b_len));
+}
+
 pub fn calcSetStringLimbsBufferLen(base: u8, string_len: usize) usize {
     const limb_count = calcSetStringLimbCount(base, string_len);
     return calcMulLimbsBufferLen(limb_count, limb_count, 2);
@@ -611,7 +616,7 @@ pub const Mutable = struct {
     /// `a` and `b` may alias with each other.
     ///
     /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
-    /// rma is given by `a.limbs.len + b.limbs.len + 1`.
+    /// rma is given by `a.limbs.len + b.limbs.len`.
     ///
     /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`.
     pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?*Allocator) void {
@@ -640,7 +645,7 @@ pub const Mutable = struct {
     /// `a` and `b` may alias with each other.
     ///
     /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
-    /// rma is given by `a.limbs.len + b.limbs.len + 1`.
+    /// rma is given by `a.limbs.len + b.limbs.len`.
     ///
     /// If `allocator` is provided, it will be used for temporary storage to improve
     /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
@@ -658,18 +663,69 @@ pub const Mutable = struct {
 
         mem.set(Limb, rma.limbs[0 .. a.limbs.len + b.limbs.len + 1], 0);
 
-        _ = llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
+        llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
 
         rma.normalize(a.limbs.len + b.limbs.len);
         rma.positive = (a.positive == b.positive);
     }
 
-    pub fn mulNoAliasWrap(
+    /// rma = a * b with 2s-complement wrapping semantics.
+    ///
+    /// `rma` may alias with `a` or `b`.
+    /// `a` and `b` may alias with each other.
+    ///
+    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
+    /// rma is given by `a.limbs.len + b.limbs.len`.
+    ///
+    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`.
+    pub fn mulWrap(
+        rma: *Mutable,
+        a: Const,
+        b: Const,
+        signedness: std.builtin.Signedness,
+        bit_count: usize,
+        limbs_buffer: []Limb,
+        allocator: ?*Allocator,
+    ) void {
+        var buf_index: usize = 0;
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+
+        const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
+            const start = buf_index;
+            const a_len = math.min(req_limbs, a.limbs.len);
+            mem.copy(Limb, limbs_buffer[buf_index..], a.limbs[0..a_len]);
+            buf_index += a_len;
+            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
+        } else a;
+
+        const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
+            const start = buf_index;
+            const b_len = math.min(req_limbs, b.limbs.len);
+            mem.copy(Limb, limbs_buffer[buf_index..], b.limbs[0..b_len]);
+            buf_index += b_len;
+            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
+        } else b;
+
+        return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator);
+    }
+
+    /// rma = a * b with 2s-complement wrapping semantics.
+    ///
+    /// `rma` may not alias with `a` or `b`.
+    /// `a` and `b` may alias with each other.
+    ///
+    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
+    /// rma is given by `a.limbs.len + b.limbs.len`.
+    ///
+    /// If `allocator` is provided, it will be used for temporary storage to improve
+    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
+    pub fn mulWrapNoAlias(
         rma: *Mutable,
         a: Const,
         b: Const,
         signedness: std.builtin.Signedness,
         bit_count: usize,
+        allocator: ?*Allocator,
     ) void {
         assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
         assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
@@ -682,15 +738,9 @@ pub const Mutable = struct {
 
         mem.set(Limb, rma.limbs[0..req_limbs], 0);
 
-        if (a_limbs.len >= b_limbs.len) {
-            llmulaccLow(rma.limbs, a_limbs, b_limbs);
-        } else {
-            llmulaccLow(rma.limbs, b_limbs, a_limbs);
-        }
-
+        llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs);
         rma.normalize(math.min(req_limbs, a.limbs.len + b.limbs.len));
         rma.positive = (a.positive == b.positive);
-
         rma.truncate(rma.toConst(), signedness, bit_count);
     }
 
@@ -2167,6 +2217,35 @@ pub const Managed = struct {
         rma.setMetadata(m.positive, m.len);
     }
 
+    /// rma = a * b with 2s-complement wrapping semantics.
+    ///
+    /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
+    /// If rma aliases a or b, then caller must call `rma.ensureCapacity(calcTwosCompLimbCount(bit_count))`
+    /// prior to calling `mul`.
+    ///
+    /// Returns an error if memory could not be allocated.
+    ///
+    /// rma's allocator is used for temporary storage to speed up the multiplication.
+    pub fn mulWrap(rma: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) !void {
+        var alias_count: usize = 0;
+        if (rma.limbs.ptr == a.limbs.ptr)
+            alias_count += 1;
+        if (rma.limbs.ptr == b.limbs.ptr)
+            alias_count += 1;
+
+        try rma.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = rma.toMutable();
+        if (alias_count == 0) {
+            m.mulWrapNoAlias(a, b, signedness, bit_count, rma.allocator);
+        } else {
+            const limb_count = calcMulWrapLimbsBufferLen(bit_count, a.limbs.len, b.limbs.len, alias_count);
+            const limbs_buffer = try rma.allocator.alloc(Limb, limb_count);
+            defer rma.allocator.free(limbs_buffer);
+            m.mulWrap(a, b, signedness, bit_count, limbs_buffer, rma.allocator);
+        }
+        rma.setMetadata(m.positive, m.len);
+    }
+
     pub fn ensureAddScalarCapacity(r: *Managed, a: Const, scalar: anytype) !void {
         try r.ensureCapacity(math.max(a.limbs.len, calcLimbLen(scalar)) + 1);
     }
@@ -2337,19 +2416,6 @@ pub const Managed = struct {
     }
 };
 
-/// r = r + a * b, ignoring overflow
-fn llmulaccLow(r: []Limb, a: []const Limb, b: []const Limb) void {
-    assert(r.len >= a.len);
-    assert(a.len >= b.len);
-
-    // TODO: Improve performance.
-
-    var i: usize = 0;
-    while (i < b.len) : (i += 1) {
-        llmulLimb(.add, r[i..], a, b[i]);
-    }
-}
-
 /// Different operators which can be used in accumulation style functions
 /// (llmulacc, llmulaccKaratsuba, llmulaccLong, llmulLimb). In all these functions,
 /// a computed value is accumulated with an existing result.
-- 
cgit v1.2.3


From bbd50248f25996e707ebacc2f020e23cee6140e3 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 16:34:52 +0200
Subject: big ints: saturate() function

---
 lib/std/math/big/int.zig | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 08e5331a67..801f57fb6a 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -525,9 +525,7 @@ pub const Mutable = struct {
         }
 
         // Saturate if the result didn't fit.
-        if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
-            r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
-        }
+        r.saturate(r.toConst(), signedness, bit_count);
     }
 
     /// Base implementation for subtraction. Subtracts `max(a.limbs.len, b.limbs.len)` elements from a and b,
@@ -661,7 +659,7 @@ pub const Mutable = struct {
             }
         }
 
-        mem.set(Limb, rma.limbs[0 .. a.limbs.len + b.limbs.len + 1], 0);
+        mem.set(Limb, rma.limbs[0 .. a.limbs.len + b.limbs.len], 0);
 
         llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
 
@@ -1366,6 +1364,17 @@ pub const Mutable = struct {
         }
     }
 
+    /// Saturate an integer to a number of bits, following 2s-complement semantics.
+    /// r may alias a.
+    ///
+    /// Asserts `r` has enough storage to store the result.
+    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
+    pub fn saturate(r: *Mutable, a: Const, signedness: std.builtin.Signedness, bit_count: usize) void {
+        if (!a.fitsInTwosComp(signedness, bit_count)) {
+            r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
+        }
+    }
+
     /// Normalize a possible sequence of leading zeros.
     ///
     /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
@@ -2414,6 +2423,14 @@ pub const Managed = struct {
         m.truncate(a, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
     }
+
+    /// r = saturate(Int(signedness, bit_count), a)
+    pub fn saturate(r: *Managed, a: Const, signedness: std.builtin.Signedness, bit_count: usize) !void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        var m = r.toMutable();
+        m.saturate(a, signedness, bit_count);
+        r.setMetadata(m.positive, m.len);
+    }
 };
 
 /// Different operators which can be used in accumulation style functions
-- 
cgit v1.2.3


From ebcdfebaa642c592bae13323c52dd1ec4f242f84 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 16:45:13 +0200
Subject: big ints: saturate() tests

---
 lib/std/math/big/int_test.zig | 64 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'lib')

diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 162be309aa..0c544f66fa 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -1569,6 +1569,70 @@ test "big.int truncate negative multi to single" {
     try testing.expect((try a.to(i17)) == 0);
 }
 
+test "big.int saturate single signed positive" {
+    var a = try Managed.initSet(testing.allocator, 0xBBBB_BBBB);
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .signed, 17);
+
+    try testing.expect((try a.to(i17)) == maxInt(i17));
+}
+
+test "big.int saturate single signed negative" {
+    var a = try Managed.initSet(testing.allocator, -1_234_567);
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .signed, 17);
+
+    try testing.expect((try a.to(i17)) == minInt(i17));
+}
+
+test "big.int saturate single signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(i17) - 1);
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .signed, 17);
+
+    try testing.expect((try a.to(i17)) == maxInt(i17) - 1);
+}
+
+test "big.int saturate multi signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(Limb) << @bitSizeOf(SignedDoubleLimb));
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
+
+    try testing.expect((try a.to(SignedDoubleLimb)) == maxInt(SignedDoubleLimb));
+}
+
+test "big.int saturate single unsigned" {
+    var a = try Managed.initSet(testing.allocator, 0xFEFE_FEFE);
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .unsigned, 23);
+
+    try testing.expect((try a.to(u23)) == maxInt(u23));
+}
+
+test "big.int saturate multi unsigned zero" {
+    var a = try Managed.initSet(testing.allocator, -1);
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect(a.eqZero());
+}
+
+
+test "big.int saturate multi unsigned" {
+    var a = try Managed.initSet(testing.allocator, maxInt(Limb) << @bitSizeOf(DoubleLimb));
+    defer a.deinit();
+
+    try a.saturate(a.toConst(), .unsigned, @bitSizeOf(DoubleLimb));
+
+    try testing.expect((try a.to(DoubleLimb)) == maxInt(DoubleLimb));
+}
+
 test "big.int shift-right single" {
     var a = try Managed.initSet(testing.allocator, 0xffff0000);
     defer a.deinit();
-- 
cgit v1.2.3


From b352564b36052053260696809ba19a7497e25940 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sun, 3 Oct 2021 23:39:47 +0200
Subject: big ints: Some extra comments

---
 lib/std/math/big/int.zig | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 801f57fb6a..bc19b652d8 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -2158,15 +2158,27 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    /// r = a + b with 2s-complement wrapping semantics.
+    ///
+    /// r, a and b may be aliases. If r aliases a or b, then caller must call
+    /// `r.ensureTwosCompCapacity` prior to calling `add`.
+    ///
+    /// Returns an error if memory could not be allocated.
     pub fn addWrap(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
-        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        try r.ensureTwosCompCapacity(bit_count);
         var m = r.toMutable();
         m.addWrap(a, b, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
     }
 
+    /// r = a + b with 2s-complement saturating semantics.
+    ///
+    /// r, a and b may be aliases. If r aliases a or b, then caller must call
+    /// `r.ensureTwosCompCapacity` prior to calling `add`.
+    ///
+    /// Returns an error if memory could not be allocated.
     pub fn addSat(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
-        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        try r.ensureTwosCompCapacity(bit_count);
         var m = r.toMutable();
         m.addSat(a, b, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
@@ -2184,15 +2196,27 @@ pub const Managed = struct {
         r.setMetadata(m.positive, m.len);
     }
 
+    /// r = a - b with 2s-complement wrapping semantics.
+    ///
+    /// r, a and b may be aliases. If r aliases a or b, then caller must call
+    /// `r.ensureTwosCompCapacity` prior to calling `add`.
+    ///
+    /// Returns an error if memory could not be allocated.
     pub fn subWrap(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
-        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        try r.ensureTwosCompCapacity(bit_count);
         var m = r.toMutable();
         m.subWrap(a, b, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
     }
 
+    /// r = a - b with 2s-complement saturating semantics.
+    ///
+    /// r, a and b may be aliases. If r aliases a or b, then caller must call
+    /// `r.ensureTwosCompCapacity` prior to calling `add`.
+    ///
+    /// Returns an error if memory could not be allocated.
     pub fn subSat(r: *Managed, a: Const, b: Const, signedness: std.builtin.Signedness, bit_count: usize) Allocator.Error!void {
-        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        try r.ensureTwosCompCapacity(bit_count);
         var m = r.toMutable();
         m.subSat(a, b, signedness, bit_count);
         r.setMetadata(m.positive, m.len);
@@ -2229,7 +2253,7 @@ pub const Managed = struct {
     /// rma = a * b with 2s-complement wrapping semantics.
     ///
     /// rma, a and b may be aliases. However, it is more efficient if rma does not alias a or b.
-    /// If rma aliases a or b, then caller must call `rma.ensureCapacity(calcTwosCompLimbCount(bit_count))`
+    /// If rma aliases a or b, then caller must call `ensureTwosCompCapacity`
     /// prior to calling `mul`.
     ///
     /// Returns an error if memory could not be allocated.
@@ -2242,7 +2266,7 @@ pub const Managed = struct {
         if (rma.limbs.ptr == b.limbs.ptr)
             alias_count += 1;
 
-        try rma.ensureCapacity(calcTwosCompLimbCount(bit_count));
+        try rma.ensureTwosCompCapacity(bit_count);
         var m = rma.toMutable();
         if (alias_count == 0) {
             m.mulWrapNoAlias(a, b, signedness, bit_count, rma.allocator);
@@ -2255,6 +2279,10 @@ pub const Managed = struct {
         rma.setMetadata(m.positive, m.len);
     }
 
+    pub fn ensureTwosCompCapacity(r: *Managed, bit_count: usize) !void {
+        try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
+    }
+
     pub fn ensureAddScalarCapacity(r: *Managed, a: Const, scalar: anytype) !void {
         try r.ensureCapacity(math.max(a.limbs.len, calcLimbLen(scalar)) + 1);
     }
-- 
cgit v1.2.3


From a62ce87f1f8874cf9c6bcf197afc638e8321f692 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 4 Oct 2021 00:05:10 +0200
Subject: big ints: mulWrap tests

---
 lib/std/math/big/int_test.zig | 79 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 0c544f66fa..7d745aa203 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -971,6 +971,84 @@ test "big.int mul large" {
     try testing.expect(b.eq(c));
 }
 
+test "big.int mulWrap single-single unsigned" {
+    var a = try Managed.initSet(testing.allocator, 1234);
+    defer a.deinit();
+    var b = try Managed.initSet(testing.allocator, 5678);
+    defer b.deinit();
+
+    var c = try Managed.init(testing.allocator);
+    defer c.deinit();
+    try c.mulWrap(a.toConst(), b.toConst(), .unsigned, 17);
+
+    try testing.expect((try c.to(u17)) == 59836);
+}
+
+test "big.int mulWrap single-single signed" {
+    var a = try Managed.initSet(testing.allocator, 1234);
+    defer a.deinit();
+    var b = try Managed.initSet(testing.allocator, -5678);
+    defer b.deinit();
+
+    var c = try Managed.init(testing.allocator);
+    defer c.deinit();
+    try c.mulWrap(a.toConst(), b.toConst(), .signed, 17);
+
+    try testing.expect((try c.to(i17)) == -59836);
+}
+
+test "big.int mulWrap multi-multi unsigned" {
+    const op1 = 0x998888efefefefefefefef;
+    const op2 = 0x333000abababababababab;
+    var a = try Managed.initSet(testing.allocator, op1);
+    defer a.deinit();
+    var b = try Managed.initSet(testing.allocator, op2);
+    defer b.deinit();
+
+    var c = try Managed.init(testing.allocator);
+    defer c.deinit();
+    try c.mulWrap(a.toConst(), b.toConst(), .unsigned, 65);
+
+    try testing.expect((try c.to(u256)) == (op1 * op2) & ((1 << 65) - 1));
+}
+
+test "big.int mulWrap multi-multi signed" {
+    var a = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb) - 1);
+    defer a.deinit();
+    var b = try Managed.initSet(testing.allocator, maxInt(SignedDoubleLimb));
+    defer b.deinit();
+
+    var c = try Managed.init(testing.allocator);
+    defer c.deinit();
+    try c.mulWrap(a.toConst(), b.toConst(), .signed, 128);
+
+    try testing.expect((try c.to(SignedDoubleLimb)) == minInt(SignedDoubleLimb) + 2);
+}
+
+test "big.int mulWrap large" {
+    var a = try Managed.initCapacity(testing.allocator, 50);
+    defer a.deinit();
+    var b = try Managed.initCapacity(testing.allocator, 100);
+    defer b.deinit();
+    var c = try Managed.initCapacity(testing.allocator, 100);
+    defer c.deinit();
+
+    // Generate a number that's large enough to cross the thresholds for the use
+    // of subquadratic algorithms
+    for (a.limbs) |*p| {
+        p.* = std.math.maxInt(Limb);
+    }
+    a.setMetadata(true, 50);
+
+    const testbits = @bitSizeOf(Limb) * 64 + 45;
+
+    try b.mulWrap(a.toConst(), a.toConst(), .signed, testbits);
+    try c.sqr(a.toConst());
+    try c.truncate(c.toConst(), .signed, testbits);
+
+    try testing.expect(b.eq(c));
+}
+
 test "big.int div single-single no rem" {
     var a = try Managed.initSet(testing.allocator, 50);
     defer a.deinit();
@@ -1623,7 +1701,6 @@ test "big.int saturate multi unsigned zero" {
     try testing.expect(a.eqZero());
 }
 
-
 test "big.int saturate multi unsigned" {
     var a = try Managed.initSet(testing.allocator, maxInt(Limb) << @bitSizeOf(DoubleLimb));
     defer a.deinit();
-- 
cgit v1.2.3


From 3dc47b8161f681baef2be776056fa9cc23a8811d Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 4 Oct 2021 00:22:03 +0200
Subject: Apply new big int wrap/saturate to Value.zig

---
 lib/std/math/big/int.zig |  2 +-
 src/value.zig            | 78 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 54 insertions(+), 26 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index bc19b652d8..4d778f1d22 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -2738,7 +2738,7 @@ pub fn llcmp(a: []const Limb, b: []const Limb) i8 {
 /// The result is computed modulo `r.len`. When `r.len >= a.len + b.len`, no overflow occurs.
 fn llmulaccLong(comptime op: AccOp, r: []Limb, a: []const Limb, b: []const Limb) void {
     @setRuntimeSafety(debug_safety);
-    assert(r.len >= a.len + b.len);
+    assert(r.len >= a.len);
     assert(a.len >= b.len);
 
     var i: usize = 0;
diff --git a/src/value.zig b/src/value.zig
index 0815c241e4..f5d7b8b05e 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1669,7 +1669,7 @@ pub const Value = extern union {
         const rhs_bigint = rhs.toBigInt(&rhs_space);
         const limbs = try arena.alloc(
             std.math.big.Limb,
-            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len),
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
         );
         var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
         result_bigint.addWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
@@ -1701,7 +1701,7 @@ pub const Value = extern union {
         const rhs_bigint = rhs.toBigInt(&rhs_space);
         const limbs = try arena.alloc(
             std.math.big.Limb,
-            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len),
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
         );
         var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
         result_bigint.addSat(lhs_bigint, rhs_bigint, info.signedness, info.bits);
@@ -1736,7 +1736,7 @@ pub const Value = extern union {
         const rhs_bigint = rhs.toBigInt(&rhs_space);
         const limbs = try arena.alloc(
             std.math.big.Limb,
-            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len),
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
         );
         var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
         result_bigint.subWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
@@ -1768,7 +1768,7 @@ pub const Value = extern union {
         const rhs_bigint = rhs.toBigInt(&rhs_space);
         const limbs = try arena.alloc(
             std.math.big.Limb,
-            std.math.max(lhs_bigint.limbs.len, rhs_bigint.limbs.len),
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
         );
         var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
         result_bigint.subSat(lhs_bigint, rhs_bigint, info.signedness, info.bits);
@@ -1794,19 +1794,31 @@ pub const Value = extern union {
         if (ty.isAnyFloat()) {
             return floatMul(lhs, rhs, ty, arena);
         }
-        const result = try intMul(lhs, rhs, arena);
 
-        const max = try ty.maxInt(arena, target);
-        if (compare(result, .gt, max, ty)) {
-            @panic("TODO comptime wrapping integer multiplication");
-        }
+        const info = ty.intInfo(target);
 
-        const min = try ty.minInt(arena, target);
-        if (compare(result, .lt, min, ty)) {
-            @panic("TODO comptime wrapping integer multiplication");
-        }
+        var lhs_space: Value.BigIntSpace = undefined;
+        var rhs_space: Value.BigIntSpace = undefined;
+        const lhs_bigint = lhs.toBigInt(&lhs_space);
+        const rhs_bigint = rhs.toBigInt(&rhs_space);
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.big.int.calcTwosCompLimbCount(info.bits),
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        var limbs_buffer = try arena.alloc(
+            std.math.big.Limb,
+            std.math.big.int.calcMulWrapLimbsBufferLen(info.bits, lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
+        );
+        defer arena.free(limbs_buffer);
+        result_bigint.mulWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits, limbs_buffer, arena);
+        const result_limbs = result_bigint.limbs[0..result_bigint.len];
 
-        return result;
+        if (result_bigint.positive) {
+            return Value.Tag.int_big_positive.create(arena, result_limbs);
+        } else {
+            return Value.Tag.int_big_negative.create(arena, result_limbs);
+        }
     }
 
     /// Supports integers only; asserts neither operand is undefined.
@@ -1820,19 +1832,35 @@ pub const Value = extern union {
         assert(!lhs.isUndef());
         assert(!rhs.isUndef());
 
-        const result = try intMul(lhs, rhs, arena);
+        const info = ty.intInfo(target);
 
-        const max = try ty.maxInt(arena, target);
-        if (compare(result, .gt, max, ty)) {
-            return max;
-        }
+        var lhs_space: Value.BigIntSpace = undefined;
+        var rhs_space: Value.BigIntSpace = undefined;
+        const lhs_bigint = lhs.toBigInt(&lhs_space);
+        const rhs_bigint = rhs.toBigInt(&rhs_space);
+        const limbs = try arena.alloc(
+            std.math.big.Limb,
+            std.math.max(
+                // For the saturate
+                std.math.big.int.calcTwosCompLimbCount(info.bits),
+                lhs_bigint.limbs.len + rhs_bigint.limbs.len,
+            ),
+        );
+        var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+        var limbs_buffer = try arena.alloc(
+            std.math.big.Limb,
+            std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
+        );
+        defer arena.free(limbs_buffer);
+        result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, arena);
+        result_bigint.saturate(result_bigint.toConst(), info.signedness, info.bits);
+        const result_limbs = result_bigint.limbs[0..result_bigint.len];
 
-        const min = try ty.minInt(arena, target);
-        if (compare(result, .lt, min, ty)) {
-            return min;
+        if (result_bigint.positive) {
+            return Value.Tag.int_big_positive.create(arena, result_limbs);
+        } else {
+            return Value.Tag.int_big_negative.create(arena, result_limbs);
         }
-
-        return result;
     }
 
     /// Supports both floats and ints; handles undefined.
@@ -2144,7 +2172,7 @@ pub const Value = extern union {
         const rhs_bigint = rhs.toBigInt(&rhs_space);
         const limbs = try allocator.alloc(
             std.math.big.Limb,
-            lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
+            lhs_bigint.limbs.len + rhs_bigint.limbs.len,
         );
         var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
         var limbs_buffer = try allocator.alloc(
-- 
cgit v1.2.3


From a1e02f33adab3ecdc85a321c4ff9c05762622c40 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 4 Oct 2021 03:54:13 +0200
Subject: fmt

---
 lib/std/math/big/int.zig | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 4d778f1d22..47fa286bdf 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -329,7 +329,7 @@ pub const Mutable = struct {
         r: *Mutable,
         limit: TwosCompIntLimit,
         signedness: std.builtin.Signedness,
-        bit_count: usize
+        bit_count: usize,
     ) void {
         // Handle zero-bit types.
         if (bit_count == 0) {
@@ -340,7 +340,7 @@ pub const Mutable = struct {
         const req_limbs = calcTwosCompLimbCount(bit_count);
         const bit = @truncate(Log2Limb, bit_count - 1);
         const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
-        const mask = (signmask << 1) -% 1;    // 0b0..011..1 where the leftmost 1 is the sign bit.
+        const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit.
 
         r.positive = true;
 
@@ -349,7 +349,7 @@ pub const Mutable = struct {
                 .min => {
                     // Negative bound, signed = -0x80.
                     r.len = req_limbs;
-                    mem.set(Limb, r.limbs[0..r.len - 1], 0);
+                    mem.set(Limb, r.limbs[0 .. r.len - 1], 0);
                     r.limbs[r.len - 1] = signmask;
                     r.positive = false;
                 },
@@ -364,11 +364,11 @@ pub const Mutable = struct {
                     } else {
                         const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
                         const msb = @truncate(Log2Limb, bit_count - 2);
-                        const new_signmask = @as(Limb, 1) << msb;  // 0b0..010..0 where 1 is the sign bit.
+                        const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit.
                         const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.
 
                         r.len = new_req_limbs;
-                        std.mem.set(Limb, r.limbs[0..r.len - 1], maxInt(Limb));
+                        std.mem.set(Limb, r.limbs[0 .. r.len - 1], maxInt(Limb));
                         r.limbs[r.len - 1] = new_mask;
                     }
                 },
@@ -381,7 +381,7 @@ pub const Mutable = struct {
                 .max => {
                     // Max bound, unsigned = 0xFF
                     r.len = req_limbs;
-                    std.mem.set(Limb, r.limbs[0..r.len - 1], maxInt(Limb));
+                    std.mem.set(Limb, r.limbs[0 .. r.len - 1], maxInt(Limb));
                     r.limbs[r.len - 1] = mask;
                 },
             },
@@ -2052,7 +2052,7 @@ pub const Managed = struct {
         r: *Managed,
         limit: TwosCompIntLimit,
         signedness: std.builtin.Signedness,
-        bit_count: usize
+        bit_count: usize,
     ) !void {
         try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
         var m = r.toMutable();
@@ -2599,14 +2599,14 @@ fn llmulaccKaratsuba(
 
     mem.set(Limb, tmp[0..p2_limbs], 0);
     llmulacc(.add, allocator, tmp[0..p2_limbs], a1[0..math.min(a1.len, p2_limbs)], b1[0..math.min(b1.len, p2_limbs)]);
-    const p2 = tmp[0 .. llnormalize(tmp[0..p2_limbs])];
+    const p2 = tmp[0..llnormalize(tmp[0..p2_limbs])];
 
     // Add p2 * B to the result.
     llaccum(op, r[split..], p2);
 
     // Add p2 * B^2 to the result if required.
     if (limbs_after_split2 > 0) {
-        llaccum(op, r[split * 2..], p2[0..math.min(p2.len, limbs_after_split2)]);
+        llaccum(op, r[split * 2 ..], p2[0..math.min(p2.len, limbs_after_split2)]);
     }
 
     // Compute p0.
@@ -2614,7 +2614,7 @@ fn llmulaccKaratsuba(
     const p0_limbs = a0.len + b0.len;
     mem.set(Limb, tmp[0..p0_limbs], 0);
     llmulacc(.add, allocator, tmp[0..p0_limbs], a0, b0);
-    const p0 = tmp[0 .. llnormalize(tmp[0..p0_limbs])];
+    const p0 = tmp[0..llnormalize(tmp[0..p0_limbs])];
 
     // Add p0 to the result.
     llaccum(op, r, p0);
@@ -2622,7 +2622,6 @@ fn llmulaccKaratsuba(
     // Add p0 * B to the result. In this case, we may not need all of it.
     llaccum(op, r[split..], p0[0..math.min(limbs_after_split, p0.len)]);
 
-
     // Finally, compute and add p1.
     // From now on we only need `limbs_after_split` limbs for a0 and b0, since the result of the
     // following computation will be added * B.
@@ -2643,8 +2642,8 @@ fn llmulaccKaratsuba(
     // Note that in this case, we again need some storage for intermediary results
     // j0 and j1. Since we have tmp.len >= 2B, we can store both
     // intermediaries in the already allocated array.
-    const j0 = tmp[0..a.len - split];
-    const j1 = tmp[a.len - split..];
+    const j0 = tmp[0 .. a.len - split];
+    const j1 = tmp[a.len - split ..];
 
     // Ensure that no subtraction overflows.
     if (j0_sign == 1) {
-- 
cgit v1.2.3


From 95fe86e3dbd3826ca0971853b275a409ac215c83 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Mon, 4 Oct 2021 11:25:10 +0200
Subject: big ints: Fix tests for 32-bit architectures

---
 lib/std/math/big/int_test.zig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 7d745aa203..ef3d61e616 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -1020,7 +1020,7 @@ test "big.int mulWrap multi-multi signed" {
 
     var c = try Managed.init(testing.allocator);
     defer c.deinit();
-    try c.mulWrap(a.toConst(), b.toConst(), .signed, 128);
+    try c.mulWrap(a.toConst(), b.toConst(), .signed, @bitSizeOf(SignedDoubleLimb));
 
     try testing.expect((try c.to(SignedDoubleLimb)) == minInt(SignedDoubleLimb) + 2);
 }
@@ -1603,9 +1603,9 @@ test "big.int truncate multi to single unsigned" {
     var a = try Managed.initSet(testing.allocator, (maxInt(Limb) + 1) | 0x1234_5678_9ABC_DEF0);
     defer a.deinit();
 
-    try a.truncate(a.toConst(), .unsigned, 37);
+    try a.truncate(a.toConst(), .unsigned, 27);
 
-    try testing.expect((try a.to(u37)) == 0x18_9ABC_DEF0);
+    try testing.expect((try a.to(u27)) == 0x2BC_DEF0);
 }
 
 test "big.int truncate multi to single signed" {
-- 
cgit v1.2.3