diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-10-11 05:51:47 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-11 05:51:47 -0400 |
| commit | c3d67c5c4e2e86b472ae046f5e0e39db61009213 (patch) | |
| tree | ee9c747780c625a4fc18badecd868ef67cf7d51f /lib/std/math/ldexp.zig | |
| parent | b316c25cc6f5b1703d7912da16c5c987f4406451 (diff) | |
| parent | a06185f3620953b8402b26a12e80aee12cd9ac8d (diff) | |
| download | zig-c3d67c5c4e2e86b472ae046f5e0e39db61009213.tar.gz zig-c3d67c5c4e2e86b472ae046f5e0e39db61009213.zip | |
Merge pull request #13117 from topolarity/compiler-rt-cmul
compiler-rt: Implement complex multiply/division
Diffstat (limited to 'lib/std/math/ldexp.zig')
| -rw-r--r-- | lib/std/math/ldexp.zig | 189 |
1 files changed, 123 insertions, 66 deletions
diff --git a/lib/std/math/ldexp.zig b/lib/std/math/ldexp.zig index 57d8896c9c..d2fd8db9b7 100644 --- a/lib/std/math/ldexp.zig +++ b/lib/std/math/ldexp.zig @@ -1,91 +1,148 @@ -// Ported from musl, which is licensed under the MIT license: -// https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT -// -// https://git.musl-libc.org/cgit/musl/tree/src/math/ldexpf.c -// https://git.musl-libc.org/cgit/musl/tree/src/math/ldexp.c - const std = @import("std"); const math = std.math; +const Log2Int = std.math.Log2Int; const assert = std.debug.assert; const expect = std.testing.expect; /// Returns x * 2^n. pub fn ldexp(x: anytype, n: i32) @TypeOf(x) { - var base = x; - var shift = n; - - const T = @TypeOf(base); + const T = @TypeOf(x); const TBits = std.meta.Int(.unsigned, @typeInfo(T).Float.bits); + const exponent_bits = math.floatExponentBits(T); const mantissa_bits = math.floatMantissaBits(T); - const exponent_min = math.floatExponentMin(T); - const exponent_max = math.floatExponentMax(T); - - const exponent_bias = exponent_max; - - // fix double rounding errors in subnormal ranges - // https://git.musl-libc.org/cgit/musl/commit/src/math/ldexp.c?id=8c44a060243f04283ca68dad199aab90336141db - const scale_min_expo = exponent_min + mantissa_bits + 1; - const scale_min = @bitCast(T, @as(TBits, scale_min_expo + exponent_bias) << mantissa_bits); - const scale_max = @bitCast(T, @intCast(TBits, exponent_max + exponent_bias) << mantissa_bits); - - // scale `shift` within floating point limits, if possible - // second pass is possible due to subnormal range - // third pass always results in +/-0.0 or +/-inf - if (shift > exponent_max) { - base *= scale_max; - shift -= exponent_max; - if (shift > exponent_max) { - base *= scale_max; - shift -= exponent_max; - if (shift > exponent_max) shift = exponent_max; + const fractional_bits = math.floatFractionalBits(T); + + const max_biased_exponent = 2 * math.floatExponentMax(T); + const mantissa_mask = @as(TBits, (1 << mantissa_bits) - 1); + + const repr = @bitCast(TBits, x); + const sign_bit = repr & (1 << (exponent_bits + mantissa_bits)); + + if (math.isNan(x) or !math.isFinite(x)) + return x; + + var exponent: i32 = @intCast(i32, (repr << 1) >> (mantissa_bits + 1)); + if (exponent == 0) + exponent += (@as(i32, exponent_bits) + @boolToInt(T == f80)) - @clz(repr << 1); + + if (n >= 0) { + if (n > max_biased_exponent - exponent) { + // Overflow. Return +/- inf + return @bitCast(T, @bitCast(TBits, math.inf(T)) | sign_bit); + } else if (exponent + n <= 0) { + // Result is subnormal + return @bitCast(T, (repr << @intCast(Log2Int(TBits), n)) | sign_bit); + } else if (exponent <= 0) { + // Result is normal, but needs shifting + var result = @intCast(TBits, n + exponent) << mantissa_bits; + result |= (repr << @intCast(Log2Int(TBits), 1 - exponent)) & mantissa_mask; + return @bitCast(T, result | sign_bit); } - } else if (shift < exponent_min) { - base *= scale_min; - shift -= scale_min_expo; - if (shift < exponent_min) { - base *= scale_min; - shift -= scale_min_expo; - if (shift < exponent_min) shift = exponent_min; + + // Result needs no shifting + return @bitCast(T, repr + (@intCast(TBits, n) << mantissa_bits)); + } else { + if (n <= -exponent) { + if (n < -(mantissa_bits + exponent)) + return @bitCast(T, sign_bit); // Severe underflow. Return +/- 0 + + // Result underflowed, we need to shift and round + const shift = @intCast(Log2Int(TBits), math.min(-n, -(exponent + n) + 1)); + const exact_tie: bool = @ctz(repr) == shift - 1; + var result = repr & mantissa_mask; + + if (T != f80) // Include integer bit + result |= @as(TBits, @boolToInt(exponent > 0)) << fractional_bits; + result = @intCast(TBits, (result >> (shift - 1))); + + // Round result, including round-to-even for exact ties + result = ((result + 1) >> 1) & ~@as(TBits, @boolToInt(exact_tie)); + return @bitCast(T, result | sign_bit); } - } - return base * @bitCast(T, @intCast(TBits, shift + exponent_bias) << mantissa_bits); + // Result is exact, and needs no shifting + return @bitCast(T, repr - (@intCast(TBits, -n) << mantissa_bits)); + } } test "math.ldexp" { - // TODO derive the various constants here with new maths API - - // basic usage - try expect(ldexp(@as(f16, 1.5), 4) == 24.0); - try expect(ldexp(@as(f32, 1.5), 4) == 24.0); - try expect(ldexp(@as(f64, 1.5), 4) == 24.0); - try expect(ldexp(@as(f128, 1.5), 4) == 24.0); // subnormals - try expect(math.isNormal(ldexp(@as(f16, 1.0), -14))); - try expect(!math.isNormal(ldexp(@as(f16, 1.0), -15))); - try expect(math.isNormal(ldexp(@as(f32, 1.0), -126))); - try expect(!math.isNormal(ldexp(@as(f32, 1.0), -127))); - try expect(math.isNormal(ldexp(@as(f64, 1.0), -1022))); - try expect(!math.isNormal(ldexp(@as(f64, 1.0), -1023))); - try expect(math.isNormal(ldexp(@as(f128, 1.0), -16382))); - try expect(!math.isNormal(ldexp(@as(f128, 1.0), -16383))); - // unreliable due to lack of native f16 support, see talk on PR #8733 - // try expect(ldexp(@as(f16, 0x1.1FFp-1), -14 - 9) == math.floatTrueMin(f16)); + try expect(ldexp(@as(f16, 0x1.1FFp14), -14 - 9 - 15) == math.floatTrueMin(f16)); try expect(ldexp(@as(f32, 0x1.3FFFFFp-1), -126 - 22) == math.floatTrueMin(f32)); try expect(ldexp(@as(f64, 0x1.7FFFFFFFFFFFFp-1), -1022 - 51) == math.floatTrueMin(f64)); + try expect(ldexp(@as(f80, 0x1.7FFFFFFFFFFFFFFEp-1), -16382 - 62) == math.floatTrueMin(f80)); try expect(ldexp(@as(f128, 0x1.7FFFFFFFFFFFFFFFFFFFFFFFFFFFp-1), -16382 - 111) == math.floatTrueMin(f128)); - // float limits try expect(ldexp(math.floatMax(f32), -128 - 149) > 0.0); try expect(ldexp(math.floatMax(f32), -128 - 149 - 1) == 0.0); - try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f16), 15 + 24))); - try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f16), 15 + 24 + 1))); - try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f32), 127 + 149))); - try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f32), 127 + 149 + 1))); - try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f64), 1023 + 1074))); - try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f64), 1023 + 1074 + 1))); - try expect(!math.isPositiveInf(ldexp(math.floatTrueMin(f128), 16383 + 16494))); - try expect(math.isPositiveInf(ldexp(math.floatTrueMin(f128), 16383 + 16494 + 1))); + + @setEvalBranchQuota(10_000); + + inline for ([_]type{ f16, f32, f64, f80, f128 }) |T| { + const fractional_bits = math.floatFractionalBits(T); + + const min_exponent = math.floatExponentMin(T); + const max_exponent = math.floatExponentMax(T); + const exponent_bias = max_exponent; + + // basic usage + try expect(ldexp(@as(T, 1.5), 4) == 24.0); + + // normals -> subnormals + try expect(math.isNormal(ldexp(@as(T, 1.0), min_exponent))); + try expect(!math.isNormal(ldexp(@as(T, 1.0), min_exponent - 1))); + + // normals -> zero + try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits) > 0.0); + try expect(ldexp(@as(T, 1.0), min_exponent - fractional_bits - 1) == 0.0); + + // subnormals -> zero + try expect(ldexp(math.floatTrueMin(T), 0) > 0.0); + try expect(ldexp(math.floatTrueMin(T), -1) == 0.0); + + // Multiplications might flush the denormals to zero, esp. at + // runtime, so we manually construct the constants here instead. + const Z = std.meta.Int(.unsigned, @bitSizeOf(T)); + const EightTimesTrueMin = @bitCast(T, @as(Z, 8)); + const TwoTimesTrueMin = @bitCast(T, @as(Z, 2)); + + // subnormals -> subnormals + try expect(ldexp(math.floatTrueMin(T), 3) == EightTimesTrueMin); + try expect(ldexp(EightTimesTrueMin, -2) == TwoTimesTrueMin); + try expect(ldexp(EightTimesTrueMin, -3) == math.floatTrueMin(T)); + + // subnormals -> normals (+) + try expect(ldexp(math.floatTrueMin(T), fractional_bits) == math.floatMin(T)); + try expect(ldexp(math.floatTrueMin(T), fractional_bits - 1) == math.floatMin(T) * 0.5); + + // subnormals -> normals (-) + try expect(ldexp(-math.floatTrueMin(T), fractional_bits) == -math.floatMin(T)); + try expect(ldexp(-math.floatTrueMin(T), fractional_bits - 1) == -math.floatMin(T) * 0.5); + + // subnormals -> float limits (+inf) + try expect(math.isFinite(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1))); + try expect(ldexp(math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == math.inf(T)); + + // subnormals -> float limits (-inf) + try expect(math.isFinite(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits - 1))); + try expect(ldexp(-math.floatTrueMin(T), max_exponent + exponent_bias + fractional_bits) == -math.inf(T)); + + // infinity -> infinity + try expect(ldexp(math.inf(T), math.maxInt(i32)) == math.inf(T)); + try expect(ldexp(math.inf(T), math.minInt(i32)) == math.inf(T)); + try expect(ldexp(math.inf(T), max_exponent) == math.inf(T)); + try expect(ldexp(math.inf(T), min_exponent) == math.inf(T)); + try expect(ldexp(-math.inf(T), math.maxInt(i32)) == -math.inf(T)); + try expect(ldexp(-math.inf(T), math.minInt(i32)) == -math.inf(T)); + + // extremely large n + try expect(ldexp(math.floatMax(T), math.maxInt(i32)) == math.inf(T)); + try expect(ldexp(math.floatMax(T), -math.maxInt(i32)) == 0.0); + try expect(ldexp(math.floatMax(T), math.minInt(i32)) == 0.0); + try expect(ldexp(math.floatTrueMin(T), math.maxInt(i32)) == math.inf(T)); + try expect(ldexp(math.floatTrueMin(T), -math.maxInt(i32)) == 0.0); + try expect(ldexp(math.floatTrueMin(T), math.minInt(i32)) == 0.0); + } } |
