add @sqrt built-in function

See #767
author: Andrew Kelley <superjoe30@gmail.com> 2018-04-15 13:21:52 -0400
committer: Andrew Kelley <superjoe30@gmail.com> 2018-04-15 13:26:58 -0400
commit: b5459eb987d89c4759c31123a7baa0a0d962c024 (patch)
tree: 4a825823adb391e54c48f7664579ad085c75724b /std
parent: 4a2bfec150ac8b78185d98324782da7841eddb9b (diff)
download: zig-b5459eb987d89c4759c31123a7baa0a0d962c024.tar.gz
zig-b5459eb987d89c4759c31123a7baa0a0d962c024.zip
3 files changed, 242 insertions, 277 deletions
diff --git a/std/math/sqrt.zig b/std/math/sqrt.zig
index 690f8b6901..982bd28b72 100644
--- a/std/math/sqrt.zig
+++ b/std/math/sqrt.zig
@@ -14,26 +14,8 @@ const TypeId = builtin.TypeId;
 pub fn sqrt(x: var) (if (@typeId(@typeOf(x)) == TypeId.Int) @IntType(false, @typeOf(x).bit_count / 2) else @typeOf(x)) {
     const T = @typeOf(x);
     switch (@typeId(T)) {
-        TypeId.FloatLiteral => {
-            return T(sqrt64(x));
-        },
-        TypeId.Float => {
-            switch (T) {
-                f32 => {
-                    switch (builtin.arch) {
-                        builtin.Arch.x86_64 => return @import("x86_64/sqrt.zig").sqrt32(x),
-                        else => return sqrt32(x),
-                    }
-                },
-                f64 => {
-                    switch (builtin.arch) {
-                        builtin.Arch.x86_64 => return @import("x86_64/sqrt.zig").sqrt64(x),
-                        else => return sqrt64(x),
-                    }
-                },
-                else => @compileError("sqrt not implemented for " ++ @typeName(T)),
-            }
-        },
+        TypeId.FloatLiteral => return T(@sqrt(f64, x)), // TODO upgrade to f128
+        TypeId.Float => return @sqrt(T, x),
         TypeId.IntLiteral => comptime {
             if (x > @maxValue(u128)) {
                 @compileError("sqrt not implemented for comptime_int greater than 128 bits");
@@ -43,269 +25,58 @@ pub fn sqrt(x: var) (if (@typeId(@typeOf(x)) == TypeId.Int) @IntType(false, @typ
             }
             return T(sqrt_int(u128, x));
         },
-        TypeId.Int => {
-            return sqrt_int(T, x);
-        },
+        TypeId.Int => return sqrt_int(T, x),
         else => @compileError("sqrt not implemented for " ++ @typeName(T)),
     }
 }
 
-fn sqrt32(x: f32) f32 {
-    const tiny: f32 = 1.0e-30;
-    const sign: i32 = @bitCast(i32, u32(0x80000000));
-    var ix: i32 = @bitCast(i32, x);
-
-    if ((ix & 0x7F800000) == 0x7F800000) {
-        return x * x + x;   // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
-    }
-
-    // zero
-    if (ix <= 0) {
-        if (ix & ~sign == 0) {
-            return x;       // sqrt (+-0) = +-0
-        }
-        if (ix < 0) {
-            return math.snan(f32);
-        }
-    }
-
-    // normalize
-    var m = ix >> 23;
-    if (m == 0) {
-        // subnormal
-        var i: i32 = 0;
-        while (ix & 0x00800000 == 0) : (i += 1) {
-            ix <<= 1;
-        }
-        m -= i - 1;
-    }
-
-    m -= 127;               // unbias exponent
-    ix = (ix & 0x007FFFFF) | 0x00800000;
-
-    if (m & 1 != 0) {       // odd m, double x to even
-        ix += ix;
-    }
-
-    m >>= 1;                // m = [m / 2]
-
-    // sqrt(x) bit by bit
-    ix += ix;
-    var q: i32 = 0;              // q = sqrt(x)
-    var s: i32 = 0;
-    var r: i32 = 0x01000000;     // r = moving bit right -> left
-
-    while (r != 0) {
-        const t = s + r;
-        if (t <= ix) {
-            s = t + r;
-            ix -= t;
-            q += r;
-        }
-        ix += ix;
-        r >>= 1;
-    }
-
-    // floating add to find rounding direction
-    if (ix != 0) {
-        var z = 1.0 - tiny;     // inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (z > 1.0) {
-                q += 2;
-            } else {
-                if (q & 1 != 0) {
-                    q += 1;
-                }
-            }
-        }
-    }
-
-    ix = (q >> 1) + 0x3f000000;
-    ix += m << 23;
-    return @bitCast(f32, ix);
-}
-
-// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
-// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
-// potentially some edge cases remaining that are not handled in the same way.
-fn sqrt64(x: f64) f64 {
-    const tiny: f64 = 1.0e-300;
-    const sign: u32 = 0x80000000;
-    const u = @bitCast(u64, x);
-
-    var ix0 = u32(u >> 32);
-    var ix1 = u32(u & 0xFFFFFFFF);
-
-    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
-    if (ix0 & 0x7FF00000 == 0x7FF00000) {
-        return x * x + x;
-    }
-
-    // sqrt(+-0) = +-0
-    if (x == 0.0) {
-        return x;
-    }
-    // sqrt(-ve) = snan
-    if (ix0 & sign != 0) {
-        return math.snan(f64);
-    }
-
-    // normalize x
-    var m = i32(ix0 >> 20);
-    if (m == 0) {
-        // subnormal
-        while (ix0 == 0) {
-            m -= 21;
-            ix0 |= ix1 >> 11;
-            ix1 <<= 21;
-        }
-
-        // subnormal
-        var i: u32 = 0;
-        while (ix0 & 0x00100000 == 0) : (i += 1) {
-            ix0 <<= 1;
-        }
-        m -= i32(i) - 1;
-        ix0 |= ix1 >> u5(32 - i);
-        ix1 <<= u5(i);
-    }
-
-    // unbias exponent
-    m -= 1023;
-    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
-    if (m & 1 != 0) {
-        ix0 += ix0 + (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-    }
-    m >>= 1;
-
-    // sqrt(x) bit by bit
-    ix0 += ix0 + (ix1 >> 31);
-    ix1 = ix1 +% ix1;
-
-    var q: u32 = 0;
-    var q1: u32 = 0;
-    var s0: u32 = 0;
-    var s1: u32 = 0;
-    var r: u32 = 0x00200000;
-    var t: u32 = undefined;
-    var t1: u32 = undefined;
-
-    while (r != 0) {
-        t = s0 +% r;
-        if (t <= ix0) {
-            s0 = t + r;
-            ix0 -= t;
-            q += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    r = sign;
-    while (r != 0) {
-        t = s1 +% r;
-        t = s0;
-        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
-            s1 = t1 +% r;
-            if (t1 & sign == sign and s1 & sign == 0) {
-                s0 += 1;
-            }
-            ix0 -= t;
-            if (ix1 < t1) {
-                ix0 -= 1;
-            }
-            ix1 = ix1 -% t1;
-            q1 += r;
-        }
-        ix0 = ix0 +% ix0 +% (ix1 >> 31);
-        ix1 = ix1 +% ix1;
-        r >>= 1;
-    }
-
-    // rounding direction
-    if (ix0 | ix1 != 0) {
-        var z = 1.0 - tiny;   // raise inexact
-        if (z >= 1.0) {
-            z = 1.0 + tiny;
-            if (q1 == 0xFFFFFFFF) {
-                q1 = 0;
-                q += 1;
-            } else if (z > 1.0) {
-                if (q1 == 0xFFFFFFFE) {
-                    q += 1;
-                }
-                q1 += 2;
-            } else {
-                q1 += q1 & 1;
-            }
-        }
-    }
-
-    ix0 = (q >> 1) + 0x3FE00000;
-    ix1 = q1 >> 1;
-    if (q & 1 != 0) {
-        ix1 |= 0x80000000;
-    }
-
-    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
-    // behaviour at least.
-    var iix0 = i32(ix0);
-    iix0 = iix0 +% (m << 20);
-
-    const uz = (u64(iix0) << 32) | ix1;
-    return @bitCast(f64, uz);
-}
-
 test "math.sqrt" {
-    assert(sqrt(f32(0.0)) == sqrt32(0.0));
-    assert(sqrt(f64(0.0)) == sqrt64(0.0));
+    assert(sqrt(f32(0.0)) == @sqrt(f32, 0.0));
+    assert(sqrt(f64(0.0)) == @sqrt(f64, 0.0));
 }
 
 test "math.sqrt32" {
     const epsilon = 0.000001;
 
-    assert(sqrt32(0.0) == 0.0);
-    assert(math.approxEq(f32, sqrt32(2.0), 1.414214, epsilon));
-    assert(math.approxEq(f32, sqrt32(3.6), 1.897367, epsilon));
-    assert(sqrt32(4.0) == 2.0);
-    assert(math.approxEq(f32, sqrt32(7.539840), 2.745877, epsilon));
-    assert(math.approxEq(f32, sqrt32(19.230934), 4.385309, epsilon));
-    assert(sqrt32(64.0) == 8.0);
-    assert(math.approxEq(f32, sqrt32(64.1), 8.006248, epsilon));
-    assert(math.approxEq(f32, sqrt32(8942.230469), 94.563370, epsilon));
+    assert(@sqrt(f32, 0.0) == 0.0);
+    assert(math.approxEq(f32, @sqrt(f32, 2.0), 1.414214, epsilon));
+    assert(math.approxEq(f32, @sqrt(f32, 3.6), 1.897367, epsilon));
+    assert(@sqrt(f32, 4.0) == 2.0);
+    assert(math.approxEq(f32, @sqrt(f32, 7.539840), 2.745877, epsilon));
+    assert(math.approxEq(f32, @sqrt(f32, 19.230934), 4.385309, epsilon));
+    assert(@sqrt(f32, 64.0) == 8.0);
+    assert(math.approxEq(f32, @sqrt(f32, 64.1), 8.006248, epsilon));
+    assert(math.approxEq(f32, @sqrt(f32, 8942.230469), 94.563370, epsilon));
 }
 
 test "math.sqrt64" {
     const epsilon = 0.000001;
 
-    assert(sqrt64(0.0) == 0.0);
-    assert(math.approxEq(f64, sqrt64(2.0), 1.414214, epsilon));
-    assert(math.approxEq(f64, sqrt64(3.6), 1.897367, epsilon));
-    assert(sqrt64(4.0) == 2.0);
-    assert(math.approxEq(f64, sqrt64(7.539840), 2.745877, epsilon));
-    assert(math.approxEq(f64, sqrt64(19.230934), 4.385309, epsilon));
-    assert(sqrt64(64.0) == 8.0);
-    assert(math.approxEq(f64, sqrt64(64.1), 8.006248, epsilon));
-    assert(math.approxEq(f64, sqrt64(8942.230469), 94.563367, epsilon));
+    assert(@sqrt(f64, 0.0) == 0.0);
+    assert(math.approxEq(f64, @sqrt(f64, 2.0), 1.414214, epsilon));
+    assert(math.approxEq(f64, @sqrt(f64, 3.6), 1.897367, epsilon));
+    assert(@sqrt(f64, 4.0) == 2.0);
+    assert(math.approxEq(f64, @sqrt(f64, 7.539840), 2.745877, epsilon));
+    assert(math.approxEq(f64, @sqrt(f64, 19.230934), 4.385309, epsilon));
+    assert(@sqrt(f64, 64.0) == 8.0);
+    assert(math.approxEq(f64, @sqrt(f64, 64.1), 8.006248, epsilon));
+    assert(math.approxEq(f64, @sqrt(f64, 8942.230469), 94.563367, epsilon));
 }
 
 test "math.sqrt32.special" {
-    assert(math.isPositiveInf(sqrt32(math.inf(f32))));
-    assert(sqrt32(0.0) == 0.0);
-    assert(sqrt32(-0.0) == -0.0);
-    assert(math.isNan(sqrt32(-1.0)));
-    assert(math.isNan(sqrt32(math.nan(f32))));
+    assert(math.isPositiveInf(@sqrt(f32, math.inf(f32))));
+    assert(@sqrt(f32, 0.0) == 0.0);
+    assert(@sqrt(f32, -0.0) == -0.0);
+    assert(math.isNan(@sqrt(f32, -1.0)));
+    assert(math.isNan(@sqrt(f32, math.nan(f32))));
 }
 
 test "math.sqrt64.special" {
-    assert(math.isPositiveInf(sqrt64(math.inf(f64))));
-    assert(sqrt64(0.0) == 0.0);
-    assert(sqrt64(-0.0) == -0.0);
-    assert(math.isNan(sqrt64(-1.0)));
-    assert(math.isNan(sqrt64(math.nan(f64))));
+    assert(math.isPositiveInf(@sqrt(f64, math.inf(f64))));
+    assert(@sqrt(f64, 0.0) == 0.0);
+    assert(@sqrt(f64, -0.0) == -0.0);
+    assert(math.isNan(@sqrt(f64, -1.0)));
+    assert(math.isNan(@sqrt(f64, math.nan(f64))));
 }
 
 fn sqrt_int(comptime T: type, value: T) @IntType(false, T.bit_count / 2) {
diff --git a/std/math/x86_64/sqrt.zig b/std/math/x86_64/sqrt.zig
deleted file mode 100644
index ad9ce0c96c..0000000000
--- a/std/math/x86_64/sqrt.zig
+++ /dev/null
@@ -1,15 +0,0 @@
-pub fn sqrt32(x: f32) f32 {
-    return asm (
-        \\sqrtss %%xmm0, %%xmm0
-        : [ret] "={xmm0}" (-> f32)
-        : [x] "{xmm0}" (x)
-    );
-}
-
-pub fn sqrt64(x: f64) f64 {
-    return asm (
-        \\sqrtsd %%xmm0, %%xmm0
-        : [ret] "={xmm0}" (-> f64)
-        : [x] "{xmm0}" (x)
-    );
-}
diff --git a/std/special/builtin.zig b/std/special/builtin.zig
index ac6eefe3d9..56aa2ebaf8 100644
--- a/std/special/builtin.zig
+++ b/std/special/builtin.zig
@@ -194,3 +194,212 @@ fn isNan(comptime T: type, bits: T) bool {
         unreachable;
     }
 }
+
+// NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
+// behaviour. Most intermediate i32 values are changed to u32 where appropriate but there are
+// potentially some edge cases remaining that are not handled in the same way.
+export fn sqrt(x: f64) f64 {
+    const tiny: f64 = 1.0e-300;
+    const sign: u32 = 0x80000000;
+    const u = @bitCast(u64, x);
+
+    var ix0 = u32(u >> 32);
+    var ix1 = u32(u & 0xFFFFFFFF);
+
+    // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = nan
+    if (ix0 & 0x7FF00000 == 0x7FF00000) {
+        return x * x + x;
+    }
+
+    // sqrt(+-0) = +-0
+    if (x == 0.0) {
+        return x;
+    }
+    // sqrt(-ve) = snan
+    if (ix0 & sign != 0) {
+        return math.snan(f64);
+    }
+
+    // normalize x
+    var m = i32(ix0 >> 20);
+    if (m == 0) {
+        // subnormal
+        while (ix0 == 0) {
+            m -= 21;
+            ix0 |= ix1 >> 11;
+            ix1 <<= 21;
+        }
+
+        // subnormal
+        var i: u32 = 0;
+        while (ix0 & 0x00100000 == 0) : (i += 1) {
+            ix0 <<= 1;
+        }
+        m -= i32(i) - 1;
+        ix0 |= ix1 >> u5(32 - i);
+        ix1 <<= u5(i);
+    }
+
+    // unbias exponent
+    m -= 1023;
+    ix0 = (ix0 & 0x000FFFFF) | 0x00100000;
+    if (m & 1 != 0) {
+        ix0 += ix0 + (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+    }
+    m >>= 1;
+
+    // sqrt(x) bit by bit
+    ix0 += ix0 + (ix1 >> 31);
+    ix1 = ix1 +% ix1;
+
+    var q: u32 = 0;
+    var q1: u32 = 0;
+    var s0: u32 = 0;
+    var s1: u32 = 0;
+    var r: u32 = 0x00200000;
+    var t: u32 = undefined;
+    var t1: u32 = undefined;
+
+    while (r != 0) {
+        t = s0 +% r;
+        if (t <= ix0) {
+            s0 = t + r;
+            ix0 -= t;
+            q += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    r = sign;
+    while (r != 0) {
+        t = s1 +% r;
+        t = s0;
+        if (t < ix0 or (t == ix0 and t1 <= ix1)) {
+            s1 = t1 +% r;
+            if (t1 & sign == sign and s1 & sign == 0) {
+                s0 += 1;
+            }
+            ix0 -= t;
+            if (ix1 < t1) {
+                ix0 -= 1;
+            }
+            ix1 = ix1 -% t1;
+            q1 += r;
+        }
+        ix0 = ix0 +% ix0 +% (ix1 >> 31);
+        ix1 = ix1 +% ix1;
+        r >>= 1;
+    }
+
+    // rounding direction
+    if (ix0 | ix1 != 0) {
+        var z = 1.0 - tiny;   // raise inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (q1 == 0xFFFFFFFF) {
+                q1 = 0;
+                q += 1;
+            } else if (z > 1.0) {
+                if (q1 == 0xFFFFFFFE) {
+                    q += 1;
+                }
+                q1 += 2;
+            } else {
+                q1 += q1 & 1;
+            }
+        }
+    }
+
+    ix0 = (q >> 1) + 0x3FE00000;
+    ix1 = q1 >> 1;
+    if (q & 1 != 0) {
+        ix1 |= 0x80000000;
+    }
+
+    // NOTE: musl here appears to rely on signed twos-complement wraparound. +% has the same
+    // behaviour at least.
+    var iix0 = i32(ix0);
+    iix0 = iix0 +% (m << 20);
+
+    const uz = (u64(iix0) << 32) | ix1;
+    return @bitCast(f64, uz);
+}
+
+export fn sqrtf(x: f32) f32 {
+    const tiny: f32 = 1.0e-30;
+    const sign: i32 = @bitCast(i32, u32(0x80000000));
+    var ix: i32 = @bitCast(i32, x);
+
+    if ((ix & 0x7F800000) == 0x7F800000) {
+        return x * x + x;   // sqrt(nan) = nan, sqrt(+inf) = +inf, sqrt(-inf) = snan
+    }
+
+    // zero
+    if (ix <= 0) {
+        if (ix & ~sign == 0) {
+            return x;       // sqrt (+-0) = +-0
+        }
+        if (ix < 0) {
+            return math.snan(f32);
+        }
+    }
+
+    // normalize
+    var m = ix >> 23;
+    if (m == 0) {
+        // subnormal
+        var i: i32 = 0;
+        while (ix & 0x00800000 == 0) : (i += 1) {
+            ix <<= 1;
+        }
+        m -= i - 1;
+    }
+
+    m -= 127;               // unbias exponent
+    ix = (ix & 0x007FFFFF) | 0x00800000;
+
+    if (m & 1 != 0) {       // odd m, double x to even
+        ix += ix;
+    }
+
+    m >>= 1;                // m = [m / 2]
+
+    // sqrt(x) bit by bit
+    ix += ix;
+    var q: i32 = 0;              // q = sqrt(x)
+    var s: i32 = 0;
+    var r: i32 = 0x01000000;     // r = moving bit right -> left
+
+    while (r != 0) {
+        const t = s + r;
+        if (t <= ix) {
+            s = t + r;
+            ix -= t;
+            q += r;
+        }
+        ix += ix;
+        r >>= 1;
+    }
+
+    // floating add to find rounding direction
+    if (ix != 0) {
+        var z = 1.0 - tiny;     // inexact
+        if (z >= 1.0) {
+            z = 1.0 + tiny;
+            if (z > 1.0) {
+                q += 2;
+            } else {
+                if (q & 1 != 0) {
+                    q += 1;
+                }
+            }
+        }
+    }
+
+    ix = (q >> 1) + 0x3f000000;
+    ix += m << 23;
+    return @bitCast(f32, ix);
+}
author	Andrew Kelley <superjoe30@gmail.com>	2018-04-15 13:21:52 -0400
committer	Andrew Kelley <superjoe30@gmail.com>	2018-04-15 13:26:58 -0400
commit	b5459eb987d89c4759c31123a7baa0a0d962c024 (patch)
tree	4a825823adb391e54c48f7664579ad085c75724b /std
parent	4a2bfec150ac8b78185d98324782da7841eddb9b (diff)
download	zig-b5459eb987d89c4759c31123a7baa0a0d962c024.tar.gz zig-b5459eb987d89c4759c31123a7baa0a0d962c024.zip