diff options
| author | Loris Cro <kappaloris@gmail.com> | 2023-06-18 09:06:40 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-18 09:06:40 +0200 |
| commit | 216ef10dc471e4db60a30208be178d6c59efeaaf (patch) | |
| tree | 8c239dab283ae9cb3b7fe099bae240bcc53f894e /lib/compiler_rt | |
| parent | 0fc1d396495c1ab482197021dedac8bea3f9401c (diff) | |
| parent | 729a051e9e38674233190aea23c0ac8c134f2d67 (diff) | |
| download | zig-216ef10dc471e4db60a30208be178d6c59efeaaf.tar.gz zig-216ef10dc471e4db60a30208be178d6c59efeaaf.zip | |
Merge branch 'master' into autodoc-searchkey
Diffstat (limited to 'lib/compiler_rt')
| -rw-r--r-- | lib/compiler_rt/README.md | 804 | ||||
| -rw-r--r-- | lib/compiler_rt/arm.zig | 8 | ||||
| -rw-r--r-- | lib/compiler_rt/clear_cache.zig | 2 | ||||
| -rw-r--r-- | lib/compiler_rt/divc3.zig | 3 | ||||
| -rw-r--r-- | lib/compiler_rt/divtc3.zig | 6 | ||||
| -rw-r--r-- | lib/compiler_rt/emutls.zig | 4 | ||||
| -rw-r--r-- | lib/compiler_rt/fixtfti.zig | 2 | ||||
| -rw-r--r-- | lib/compiler_rt/fixunstfti.zig | 2 | ||||
| -rw-r--r-- | lib/compiler_rt/floattitf.zig | 2 | ||||
| -rw-r--r-- | lib/compiler_rt/multc3.zig | 6 | ||||
| -rw-r--r-- | lib/compiler_rt/negtf2.zig | 6 | ||||
| -rw-r--r-- | lib/compiler_rt/powiXf2.zig | 6 | ||||
| -rw-r--r-- | lib/compiler_rt/powiXf2_test.zig | 465 | ||||
| -rw-r--r-- | lib/compiler_rt/udivmod.zig | 308 |
14 files changed, 397 insertions, 1227 deletions
diff --git a/lib/compiler_rt/README.md b/lib/compiler_rt/README.md deleted file mode 100644 index 47c0e7978a..0000000000 --- a/lib/compiler_rt/README.md +++ /dev/null @@ -1,804 +0,0 @@ -If hardware lacks basic or specialized functionality, compiler-rt adds such functionality -for basic arithmetic(s). -One such example is 64-bit integer multiplication on 32-bit x86. - -Goals: -1. zig as linker for object files produced by other compilers - => `function compatibility` to compiler-rt and libgcc for same-named functions - * compatibility conflict between compiler-rt and libgcc: prefer compiler-rt -2. `symbol-level compatibility` low-priority compared to emitted calls by llvm - * symbol-level compatibility: libgcc even lower priority -3. add zig-specific language runtime features, see #7265 - * example: arbitrary bit width integer arithmetic - * lower to call those functions for e.g. multiplying two i12345 numbers together - * proper naming + documention for standardizing (allow languages to follow our exmaple) - -Current status (tracking libgcc documentation): -- Integer library routines => almost implemented -- Soft float library routines => finished -- Decimal float library routines => unimplemented (~120 functions) -- Fixed-point fractional library routines => unimplemented (~300 functions) -- Exception handling routines => unclear, if supported (~32+x undocumented functions) -- Miscellaneous routines => unclear, if supported (cache control and stack function) -- No zig-specific language runtime features in compiler-rt yet - -This library is automatically built as-needed for the compilation target and -then statically linked and therefore is a transparent dependency for the -programmer. -For details see `../compiler_rt.zig`. - -Bugs should be solved by trying to duplicate the bug upstream, if possible. - * If the bug exists upstream, get it fixed upstream and port the fix downstream to Zig. - * If the bug only exists in Zig, use the corresponding C code and debug - both implementations side by side to figure out what is wrong. - -Routines with status are given below. Sources were besides -"The Art of Computer Programming" by Donald E. Knuth, "HackersDelight" by Henry S. Warren, -"Bit Twiddling Hacks" collected by Sean Eron Anderson, "Berkeley SoftFloat" by John R. Hauser, -LLVM "compiler-rt" as it was MIT-licensed, "musl libc" and thoughts + work of contributors. - -The compiler-rt routines have not yet been audited. -See https://github.com/ziglang/zig/issues/1504. - -From left to right the columns mean 1. if the routine is implemented (✗ or ✓), -2. the name, 3. input (`a`), 4. input (`b`), 5. return value, -6. an explanation of the functionality, .. to repeat the comment from the -column a row above and/or additional return values. -Some routines have more extensive comments supplemented with a reference text. - -Integer and Float Operations - -| Done | Name | a | b | Out | Comment | -| ------ | ------------- | ---- | ---- | ---- | ------------------------------ | -| | | | | | **Integer Bit Operations** | -| ✓ | __clzsi2 | u32 | ∅ | i32 | count leading zeros | -| ✓ | __clzdi2 | u64 | ∅ | i32 | count leading zeros | -| ✓ | __clzti2 | u128 | ∅ | i32 | count leading zeros | -| ✓ | __ctzsi2 | u32 | ∅ | i32 | count trailing zeros | -| ✓ | __ctzdi2 | u64 | ∅ | i32 | count trailing zeros | -| ✓ | __ctzti2 | u128 | ∅ | i32 | count trailing zeros | -| ✓ | __ffssi2 | u32 | ∅ | i32 | find least significant 1 bit | -| ✓ | __ffsdi2 | u64 | ∅ | i32 | find least significant 1 bit | -| ✓ | __ffsti2 | u128 | ∅ | i32 | find least significant 1 bit | -| ✓ | __paritysi2 | u32 | ∅ | i32 | bit parity | -| ✓ | __paritydi2 | u64 | ∅ | i32 | bit parity | -| ✓ | __parityti2 | u128 | ∅ | i32 | bit parity | -| ✓ | __popcountsi2 | u32 | ∅ | i32 | bit population | -| ✓ | __popcountdi2 | u64 | ∅ | i32 | bit population | -| ✓ | __popcountti2 | u128 | ∅ | i32 | bit population | -| ✓ | __bswapsi2 | u32 | ∅ | i32 | byte swap | -| ✓ | __bswapdi2 | u64 | ∅ | i32 | byte swap | -| ✓ | __bswapti2 | u128 | ∅ | i32 | byte swap | -| | | | | | **Integer Comparison** | -| ✓ | __cmpsi2 | i32 | i32 | i32 | `(a<b) -> 0, (a==b) -> 1, (a>b) -> 2` | -| ✓ | __cmpdi2 | i64 | i64 | i32 | .. | -| ✗ | __aeabi_lcmp | i64 | i64 | i32 | .. ARM | -| ✓ | __cmpti2 | i128 | i128 | i32 | .. | -| ✓ | __ucmpsi2 | u32 | u32 | i32 | `(a<b) -> 0, (a==b) -> 1, (a>b) -> 2` | -| ✓ | __ucmpdi2 | u64 | u64 | i32 | .. | -| ✗ | __aeabi_ulcmp | u64 | u64 | i32 | .. ARM | -| ✓ | __ucmpti2 | u128 | u128 | i32 | .. | -| | | | | | **Integer Arithmetic** | -| ✓ | __ashlsi3 | i32 | i32 | i32 | `a << b` [^unused_rl78] | -| ✓ | __ashldi3 | i64 | i32 | i64 | .. | -| ✓ | __ashlti3 | i128 | i32 | i128 | .. | -| ✓ | __aeabi_llsl | i32 | i32 | i32 | .. ARM | -| ✓ | __ashrsi3 | i32 | i32 | i32 | `a >> b` arithmetic (sign fill) [^unused_rl78] | -| ✓ | __ashrdi3 | i64 | i32 | i64 | .. | -| ✓ | __ashrti3 | i128 | i32 | i128 | .. | -| ✓ | __aeabi_lasr | i64 | i32 | i64 | .. ARM | -| ✓ | __lshrsi3 | i32 | i32 | i32 | `a >> b` logical (zero fill) [^unused_rl78] | -| ✓ | __lshrdi3 | i64 | i32 | i64 | .. | -| ✓ | __lshrti3 | i128 | i32 | i128 | .. | -| ✓ | __aeabi_llsr | i64 | i32 | i64 | .. ARM | -| ✓ | __negsi2 | i32 | i32 | i32 | `-a` [^libgcc_compat] | -| ✓ | __negdi2 | i64 | i64 | i64 | .. | -| ✓ | __negti2 | i128 | i128 | i128 | .. | -| ✓ | __mulsi3 | i32 | i32 | i32 | `a * b` | -| ✓ | __muldi3 | i64 | i64 | i64 | .. | -| ✓ | __aeabi_lmul | i64 | i64 | i64 | .. ARM | -| ✓ | __multi3 | i128 | i128 | i128 | .. | -| ✓ | __divsi3 | i32 | i32 | i32 | `a / b` | -| ✓ | __divdi3 | i64 | i64 | i64 | .. | -| ✓ | __divti3 | i128 | i128 | i128 | .. | -| ✓ | __aeabi_idiv | i32 | i32 | i32 | .. ARM | -| ✓ | __udivsi3 | u32 | u32 | u32 | `a / b` | -| ✓ | __udivdi3 | u64 | u64 | u64 | .. | -| ✓ | __udivti3 | u128 | u128 | u128 | .. | -| ✓ | __aeabi_uidiv | i32 | i32 | i32 | .. ARM | -| ✓ | __modsi3 | i32 | i32 | i32 | `a % b` | -| ✓ | __moddi3 | i64 | i64 | i64 | .. | -| ✓ | __modti3 | i128 | i128 | i128 | .. | -| ✓ | __umodsi3 | u32 | u32 | u32 | `a % b` | -| ✓ | __umoddi3 | u64 | u64 | u64 | .. | -| ✓ | __umodti3 | u128 | u128 | u128 | .. | -| ✓ | __udivmodsi4 | u32 | u32 | u32 | `a / b, rem.* = a % b` | -| ✓ | __aeabi_uidivmod | u32 | u32 | u32 | .. ARM | -| ✓ | __udivmoddi4 | u64 | u64 | u64 | .. | -| ✓ | __aeabi_uldivmod | u64 | u64 | u64 | .. ARM | -| ✓ | __udivmodti4 | u128 | u128 | u128 | .. | -| ✓ | __divmodsi4 | i32 | i32 | i32 | `a / b, rem.* = a % b` | -| ✓ | __aeabi_idivmod | i32 | i32 | i32 | .. ARM | -| ✓ | __divmoddi4 | i64 | i64 | i64 | .. | -| ✓ | __aeabi_ldivmod | i64 | i64 | i64 | .. ARM | -| ✓ | __divmodti4 | i128 | i128 | i128 | .. [^libgcc_compat] | -| | | | | | **Integer Arithmetic with Trapping Overflow**| -| ✓ | __absvsi2 | i32 | i32 | i32 | abs(a) | -| ✓ | __absvdi2 | i64 | i64 | i64 | .. | -| ✓ | __absvti2 | i128 | i128 | i128 | .. | -| ✓ | __negvsi2 | i32 | i32 | i32 | `-a` [^libgcc_compat] | -| ✓ | __negvdi2 | i64 | i64 | i64 | .. | -| ✓ | __negvti2 | i128 | i128 | i128 | .. | -| ✗ | __addvsi3 | i32 | i32 | i32 | `a + b` | -| ✗ | __addvdi3 | i64 | i64 | i64 | .. | -| ✗ | __addvti3 | i128 | i128 | i128 | .. | -| ✗ | __subvsi3 | i32 | i32 | i32 | `a - b` | -| ✗ | __subvdi3 | i64 | i64 | i64 | .. | -| ✗ | __subvti3 | i128 | i128 | i128 | .. | -| ✗ | __mulvsi3 | i32 | i32 | i32 | `a * b` | -| ✗ | __mulvdi3 | i64 | i64 | i64 | .. | -| ✗ | __mulvti3 | i128 | i128 | i128 | .. | -| | | | | | **Integer Arithmetic which Return on Overflow** [^noptr_faster] | -| ✓ | __addosi4 | i32 | i32 | i32 | `a + b`, overflow->ov.*=1 else 0 [^perf_addition] | -| ✓ | __addodi4 | i64 | i64 | i64 | .. | -| ✓ | __addoti4 | i128 | i128 | i128 | .. | -| ✓ | __subosi4 | i32 | i32 | i32 | `a - b`, overflow->ov.*=1 else 0 [^perf_addition] | -| ✓ | __subodi4 | i64 | i64 | i64 | .. | -| ✓ | __suboti4 | i128 | i128 | i128 | .. | -| ✓ | __mulosi4 | i32 | i32 | i32 | `a * b`, overflow->ov.*=1 else 0 | -| ✓ | __mulodi4 | i64 | i64 | i64 | .. | -| ✓ | __muloti4 | i128 | i128 | i128 | .. | -| | | | | | **Float Conversion** | -| ✓ | __extendhfdf2 | f16 | ∅ | f32 | .. | -| ✓ | __extendsfdf2 | f32 | ∅ | f64 | .. | -| ✓ | __aeabi_f2d | f32 | ∅ | f64 | .. | -| ✓ | __extendsftf2 | f32 | ∅ | f128 | .. | -| ✓ | __extendsfxf2 | f32 | ∅ | f80 | .. | -| ✓ | __extenddftf2 | f64 | ∅ | f128 | .. | -| ✓ | __extenddfxf2 | f64 | ∅ | f80 | .. | -| ✗ | __aeabi_h2f | f16 | ∅ | f32 | .. ARM | -| ✗ | __aeabi_h2f_alt | f16 | ∅ | f32 | .. ARM alternate [^VFPv3alt] | -| ✓ | __gnu_h2f_ieee | f16 | ∅ | f32 | .. GNU naming convention | -| ✓ | __truncsfhf2 | f32 | ∅ | f16 | rounding towards zero | -| ✓ | __truncdfhf2 | f64 | ∅ | f16 | .. | -| ✓ | __truncdfsf2 | f64 | ∅ | f32 | .. | -| ✓ | __trunctfhf2 | f128 | ∅ | f16 | .. | -| ✓ | __trunctfsf2 | f128 | ∅ | f32 | .. | -| ✓ | __trunctfdf2 | f128 | ∅ | f64 | .. | -| ✓ | __trunctfxf2 | f128 | ∅ | f80 | .. | -| ✓ | __truncxfhf2 | f80 | ∅ | f16 | .. | -| ✓ | __truncxfsf2 | f80 | ∅ | f32 | .. | -| ✓ | __truncxfdf2 | f80 | ∅ | f64 | .. | -| ✗ | __aeabi_f2h | f32 | ∅ | f16 | .. ARM | -| ✗ | __aeabi_f2h_alt | f32 | ∅ | f16 | .. ARM alternate [^VFPv3alt] | -| ✓ | __gnu_f2h_ieee | f32 | ∅ | f16 | .. GNU naming convention | -| ✓ | __aeabi_d2h | f64 | ∅ | f16 | .. ARM | -| ✗ | __aeabi_d2h_alt | f64 | ∅ | f16 | .. ARM alternate [^VFPv3alt] | -| ✓ | __aeabi_d2f | f64 | ∅ | f32 | .. ARM | -| ✓ | __trunckfsf2 | f128 | ∅ | f32 | .. PPC | -| ✓ | _Qp_qtos |*f128 | ∅ | f32 | .. SPARC | -| ✓ | __trunckfdf2 | f128 | ∅ | f64 | .. PPC | -| ✓ | _Qp_qtod |*f128 | ∅ | f64 | .. SPARC | -| ✓ | __fixhfsi | f16 | ∅ | i32 | float to int, rounding towards zero | -| ✓ | __fixsfsi | f32 | ∅ | i32 | .. | -| ✓ | __aeabi_f2iz | f32 | ∅ | i32 | .. ARM | -| ✓ | __fixdfsi | f64 | ∅ | i32 | .. | -| ✓ | __aeabi_d2iz | f64 | ∅ | i32 | .. ARM | -| ✓ | __fixtfsi | f128 | ∅ | i32 | .. | -| ✓ | __fixxfsi | f80 | ∅ | i32 | .. | -| ✓ | __fixhfdi | f16 | ∅ | i64 | .. | -| ✓ | __fixsfdi | f32 | ∅ | i64 | .. | -| ✓ | __aeabi_f2lz | f32 | ∅ | i64 | .. ARM | -| ✓ | __fixdfdi | f64 | ∅ | i64 | .. | -| ✓ | __aeabi_d2lz | f64 | ∅ | i64 | .. ARM | -| ✓ | __fixtfdi | f128 | ∅ | i64 | .. | -| ✓ | __fixxfdi | f80 | ∅ | i64 | .. | -| ✓ | __fixhfti | f16 | ∅ | i128 | .. | -| ✓ | __fixsfti | f32 | ∅ | i128 | .. | -| ✓ | __fixdfti | f64 | ∅ | i128 | .. | -| ✓ | __fixtfti | f128 | ∅ | i128 | .. | -| ✓ | __fixxfti | f80 | ∅ | i128 | .. | -| ✓ | __fixunshfsi | f16 | ∅ | u32 | float to uint, rounding towards zero. negative values become 0. | -| ✓ | __fixunssfsi | f32 | ∅ | u32 | .. | -| ✓ | __aeabi_f2uiz | f32 | ∅ | u32 | .. ARM | -| ✓ | __fixunsdfsi | f64 | ∅ | u32 | .. | -| ✓ | __aeabi_d2uiz | f64 | ∅ | u32 | .. ARM | -| ✓ | __fixunstfsi | f128 | ∅ | u32 | .. | -| ✓ | __fixunsxfsi | f80 | ∅ | u32 | .. | -| ✓ | __fixunshfdi | f16 | ∅ | u64 | .. | -| ✓ | __fixunssfdi | f32 | ∅ | u64 | .. | -| ✓ | __aeabi_f2ulz | f32 | ∅ | u64 | .. ARM | -| ✓ | __fixunsdfdi | f64 | ∅ | u64 | .. | -| ✓ | __aeabi_d2ulz | f64 | ∅ | u64 | .. ARM | -| ✓ | __fixunstfdi | f128 | ∅ | u64 | .. | -| ✓ | __fixunsxfdi | f80 | ∅ | u64 | .. | -| ✓ | __fixunshfti | f16 | ∅ | u128 | .. | -| ✓ | __fixunssfti | f32 | ∅ | u128 | .. | -| ✓ | __fixunsdfti | f64 | ∅ | u128 | .. | -| ✓ | __fixunstfti | f128 | ∅ | u128 | .. | -| ✓ | __fixunsxfti | f80 | ∅ | u128 | .. | -| ✓ | __floatsihf | i32 | ∅ | f16 | int to float | -| ✓ | __floatsisf | i32 | ∅ | f32 | .. | -| ✓ | __aeabi_i2f | i32 | ∅ | f32 | .. ARM | -| ✓ | __floatsidf | i32 | ∅ | f64 | .. | -| ✓ | __aeabi_i2d | i32 | ∅ | f64 | .. ARM | -| ✓ | __floatsitf | i32 | ∅ | f128 | .. | -| ✓ | __floatsixf | i32 | ∅ | f80 | .. | -| ✓ | __floatdisf | i64 | ∅ | f32 | .. | -| ✓ | __aeabi_l2f | i64 | ∅ | f32 | .. ARM | -| ✓ | __floatdidf | i64 | ∅ | f64 | .. | -| ✓ | __aeabi_l2d | i64 | ∅ | f64 | .. ARM | -| ✓ | __floatditf | i64 | ∅ | f128 | .. | -| ✓ | __floatdixf | i64 | ∅ | f80 | .. | -| ✓ | __floattihf | i128 | ∅ | f16 | .. | -| ✓ | __floattisf | i128 | ∅ | f32 | .. | -| ✓ | __floattidf | i128 | ∅ | f64 | .. | -| ✓ | __floattitf | i128 | ∅ | f128 | .. | -| ✓ | __floattixf | i128 | ∅ | f80 | .. | -| ✓ | __floatunsihf | u32 | ∅ | f16 | uint to float | -| ✓ | __floatunsisf | u32 | ∅ | f32 | .. | -| ✓ | __aeabi_ui2f | u32 | ∅ | f32 | .. ARM | -| ✓ | __floatunsidf | u32 | ∅ | f64 | .. | -| ✓ | __aeabi_ui2d | u32 | ∅ | f64 | .. ARM | -| ✓ | __floatunsitf | u32 | ∅ | f128 | .. | -| ✓ | __floatunsixf | u32 | ∅ | f80 | .. | -| ✓ | __floatundihf | u64 | ∅ | f16 | .. | -| ✓ | __floatundisf | u64 | ∅ | f32 | .. | -| ✓ | __aeabi_ul2f | u64 | ∅ | f32 | .. ARM | -| ✓ | __floatundidf | u64 | ∅ | f64 | .. | -| ✓ | __aeabi_ul2d | u64 | ∅ | f64 | .. ARM | -| ✓ | __floatunditf | u64 | ∅ | f128 | .. | -| ✓ | __floatundixf | u64 | ∅ | f80 | .. | -| ✓ | __floatuntihf | u128 | ∅ | f16 | .. | -| ✓ | __floatuntisf | u128 | ∅ | f32 | .. | -| ✓ | __floatuntidf | u128 | ∅ | f64 | .. | -| ✓ | __floatuntitf | u128 | ∅ | f128 | .. | -| ✓ | __floatuntixf | u128 | ∅ | f80 | .. | -| | | | | | **Float Comparison** | -| ✓ | __cmphf2 | f16 | f16 | i32 | `(a<b)->-1, (a==b)->0, (a>b)->1, Nan->1` | -| ✓ | __cmpsf2 | f32 | f32 | i32 | exported from __lesf2, __ledf2, __letf2 (below) | -| ✓ | __cmpdf2 | f64 | f64 | i32 | But: if NaN is a possibility, use another routine. | -| ✓ | __cmptf2 | f128 | f128 | i32 | .. | -| ✓ | __cmpxf2 | f80 | f80 | i32 | .. | -| ✓ | _Qp_cmp |*f128 |*f128 | i32 | .. SPARC | -| ✓ | __unordhf2 | f16 | f16 | i32 | `(a==+-NaN or b==+-NaN) -> !=0, else -> 0` | -| ✓ | __unordsf2 | f32 | f32 | i32 | .. | -| ✓ | __unorddf2 | f64 | f64 | i32 | Note: only reliable for (input!=NaN) | -| ✓ | __unordtf2 | f128 | f128 | i32 | .. | -| ✓ | __unordxf2 | f80 | f80 | i32 | .. | -| ✓ | __aeabi_fcmpun | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmpun | f32 | f32 | i32 | .. ARM | -| ✓ | __unordkf2 | f128 | f128 | i32 | .. PPC | -| ✓ | __eqhf2 | f16 | f16 | i32 | `(a!=NaN) and (b!=Nan) and (a==b) -> output=0` | -| ✓ | __eqsf2 | f32 | f32 | i32 | .. | -| ✓ | __eqdf2 | f64 | f64 | i32 | .. | -| ✓ | __eqtf2 | f128 | f128 | i32 | .. | -| ✓ | __eqxf2 | f80 | f80 | i32 | .. | -| ✓ | __aeabi_fcmpeq | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmpeq | f32 | f32 | i32 | .. ARM | -| ✓ | __eqkf2 | f128 | f128 | i32 | .. PPC | -| ✓ | _Qp_feq |*f128 |*f128 | bool | .. SPARC | -| ✓ | __nehf2 | f16 | f16 | i32 | `(a==NaN) or (b==Nan) or (a!=b) -> output!=0` | -| ✓ | __nesf2 | f32 | f32 | i32 | Note: __eqXf2 and __neXf2 have same return value | -| ✓ | __nedf2 | f64 | f64 | i32 | .. | -| ✓ | __netf2 | f128 | f128 | i32 | .. | -| ✓ | __nexf2 | f80 | f80 | i32 | .. | -| ✓ | __nekf2 | f128 | f128 | i32 | .. PPC | -| ✓ | _Qp_fne |*f128 |*f128 | bool | .. SPARC | -| ✓ | __gehf2 | f16 | f16 | i32 | `(a!=Nan) and (b!=Nan) and (a>=b) -> output>=0` | -| ✓ | __gesf2 | f32 | f32 | i32 | .. | -| ✓ | __gedf2 | f64 | f64 | i32 | .. | -| ✓ | __getf2 | f128 | f128 | i32 | .. | -| ✓ | __gexf2 | f80 | f80 | i32 | .. | -| ✓ | __aeabi_fcmpge | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmpge | f64 | f64 | i32 | .. ARM | -| ✓ | __gekf2 | f128 | f128 | i32 | .. PPC | -| ✓ | _Qp_fge |*f128 |*f128 | bool | .. SPARC | -| ✓ | __lthf2 | f16 | f16 | i32 | `(a!=Nan) and (b!=Nan) and (a<b) -> output<0` | -| ✓ | __ltsf2 | f32 | f32 | i32 | .. | -| ✓ | __ltdf2 | f64 | f64 | i32 | .. | -| ✓ | __lttf2 | f128 | f128 | i32 | .. | -| ✓ | __ltxf2 | f80 | f80 | i32 | .. | -| ✓ | __ltkf2 | f128 | f128 | i32 | .. PPC | -| ✓ | __aeabi_fcmplt | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmplt | f32 | f32 | i32 | .. ARM | -| ✓ | _Qp_flt |*f128 |*f128 | bool | .. SPARC | -| ✓ | __lehf2 | f16 | f16 | i32 | `(a!=Nan) and (b!=Nan) and (a<=b) -> output<=0` | -| ✓ | __lesf2 | f32 | f32 | i32 | .. | -| ✓ | __ledf2 | f64 | f64 | i32 | .. | -| ✓ | __letf2 | f128 | f128 | i32 | .. | -| ✓ | __lexf2 | f80 | f80 | i32 | .. | -| ✓ | __aeabi_fcmple | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmple | f32 | f32 | i32 | .. ARM | -| ✓ | __lekf2 | f128 | f128 | i32 | .. PPC | -| ✓ | _Qp_fle |*f128 |*f128 | bool | .. SPARC | -| ✓ | __gthf2 | f16 | f16 | i32 | `(a!=Nan) and (b!=Nan) and (a>b) -> output>0` | -| ✓ | __gtsf2 | f32 | f32 | i32 | .. | -| ✓ | __gtdf2 | f64 | f64 | i32 | .. | -| ✓ | __gttf2 | f128 | f128 | i32 | .. | -| ✓ | __gtxf2 | f80 | f80 | i32 | .. | -| ✓ | __aeabi_fcmpgt | f32 | f32 | i32 | .. ARM | -| ✓ | __aeabi_dcmpgt | f64 | f64 | i32 | .. ARM | -| ✓ | __gtkf2 | f128 | f128 | i32 | .. PPC | -| ✓ | _Qp_fgt |*f128 |*f128 | bool | .. SPARC | -| | | | | | **Float Arithmetic** | -| ✓ | __addhf3 | f32 | f32 | f32 | `a + b` | -| ✓ | __addsf3 | f32 | f32 | f32 | .. | -| ✓ | __adddf3 | f64 | f64 | f64 | .. | -| ✓ | __addtf3 | f128 | f128 | f128 | .. | -| ✓ | __addxf3 | f80 | f80 | f80 | .. | -| ✓ | __aeabi_fadd | f32 | f32 | f32 | .. ARM | -| ✓ | __aeabi_dadd | f64 | f64 | f64 | .. ARM | -| ✓ | __addkf3 | f128 | f128 | f128 | .. PPC | -| ✓ | _Qp_add |*f128 |*f128 | void | .. SPARC args *c,*a,*b c=a+b | -| ✓ | __subhf3 | f32 | f32 | f32 | `a - b` | -| ✓ | __subsf3 | f32 | f32 | f32 | .. | -| ✓ | __subdf3 | f64 | f64 | f64 | .. | -| ✓ | __subtf3 | f128 | f128 | f128 | .. | -| ✓ | __subxf3 | f80 | f80 | f80 | .. | -| ✓ | __aeabi_fsub | f32 | f32 | f32 | .. ARM | -| ✓ | __aeabi_dsub | f64 | f64 | f64 | .. ARM | -| ✓ | __subkf3 | f128 | f128 | f128 | .. PPC | -| ✓ | _Qp_sub |*f128 |*f128 | void | .. SPARC args *c,*a,*b c=a-b | -| ✓ | __mulhf3 | f32 | f32 | f32 | `a * b` | -| ✓ | __mulsf3 | f32 | f32 | f32 | .. | -| ✓ | __muldf3 | f64 | f64 | f64 | .. | -| ✓ | __multf3 | f128 | f128 | f128 | .. | -| ✓ | __mulxf3 | f80 | f80 | f80 | .. | -| ✓ | __aeabi_fmul | f32 | f32 | f32 | .. ARM | -| ✓ | __aeabi_dmul | f64 | f64 | f64 | .. ARM | -| ✓ | __mulkf3 | f128 | f128 | f128 | .. PPC | -| ✓ | _Qp_mul |*f128 |*f128 | void | .. SPARC args *c,*a,*b c=a*b | -| ✓ | __divsf3 | f32 | f32 | f32 | `a / b` | -| ✓ | __divdf3 | f64 | f64 | f64 | .. | -| ✓ | __divtf3 | f128 | f128 | f128 | .. | -| ✓ | __divxf3 | f80 | f80 | f80 | .. | -| ✓ | __aeabi_fdiv | f32 | f32 | f32 | .. ARM | -| ✓ | __aeabi_ddiv | f64 | f64 | f64 | .. ARM | -| ✓ | __divkf3 | f128 | f128 | f128 | .. PPC | -| ✓ | _Qp_div |*f128 |*f128 | void | .. SPARC args *c,*a,*b c=a*b | -| ✓ | __negsf2 | f32 | ∅ | f32[^unused_rl78] | -a (can be lowered directly to a xor) | -| ✓ | __negdf2 | f64 | ∅ | f64 | .. | -| ✓ | __negtf2 | f128 | ∅ | f128 | .. | -| ✓ | __negxf2 | f80 | ∅ | f80 | .. | -| | | | | | **Other** | -| ✓ | __powihf2 | f16 | i32 | f16 | `a ^ b` | -| ✓ | __powisf2 | f32 | i32 | f32 | .. | -| ✓ | __powidf2 | f64 | i32 | f64 | .. | -| ✓ | __powitf2 | f128 | i32 | f128 | .. | -| ✓ | __powixf2 | f80 | i32 | f80 | .. | -| ✓ | __mulhc3 | all4 | f16 | f16 | `(a+ib) * (c+id)` | -| ✓ | __mulsc3 | all4 | f32 | f32 | .. | -| ✓ | __muldc3 | all4 | f64 | f64 | .. | -| ✓ | __multc3 | all4 | f128 | f128 | .. | -| ✓ | __mulxc3 | all4 | f80 | f80 | .. | -| ✓ | __divhc3 | all4 | f16 | f16 | `(a+ib) / (c+id)` | -| ✓ | __divsc3 | all4 | f32 | f32 | .. | -| ✓ | __divdc3 | all4 | f64 | f64 | .. | -| ✓ | __divtc3 | all4 | f128 | f128 | .. | -| ✓ | __divxc3 | all4 | f80 | f80 | .. | - -[^unused_rl78]: Unused in LLVM, but used for example by rl78. -[^libgcc_compat]: Unused in backends and for symbol-level compatibility with libgcc. -[^noptr_faster]: Operations without pointer and without C struct semantics lead to better optimizations. -[^perf_addition]: Has better performance than standard method due to 2s complement semantics. -Not provided by LLVM and libgcc. -[^VFPv3alt]: Converts IEEE-format to VFPv3 alternative-format. - -Decimal float library routines - -BID means Binary Integer Decimal encoding, DPD means Densely Packed Decimal encoding. -BID should be only chosen for binary data, DPD for decimal data (ASCII, Unicode etc). -For example the number 0.2 is not accurately representable in binary data. - -| Done | Name | a | b | Out | Comment | -| ------ | ------------- | --------- | --------- | --------- | ---------------------------- | -| | | | | | **Decimal Float Conversion** | -| ✗ | __dpd_extendsddd2 | dec32 | ∅ | dec64 | conversion | -| ✗ | __bid_extendsddd2 | dec32 | ∅ | dec64 | .. | -| ✗ | __dpd_extendsdtd2 | dec32 | ∅ | dec128| .. | -| ✗ | __bid_extendsdtd2 | dec32 | ∅ | dec128| .. | -| ✗ | __dpd_extendddtd2 | dec64 | ∅ | dec128| .. | -| ✗ | __bid_extendddtd2 | dec64 | ∅ | dec128| .. | -| ✗ | __dpd_truncddsd2 | dec64 | ∅ | dec32 | .. | -| ✗ | __bid_truncddsd2 | dec64 | ∅ | dec32 | .. | -| ✗ | __dpd_trunctdsd2 | dec128 | ∅ | dec32 | .. | -| ✗ | __bid_trunctdsd2 | dec128 | ∅ | dec32 | .. | -| ✗ | __dpd_trunctddd2 | dec128 | ∅ | dec64 | .. | -| ✗ | __bid_trunctddd2 | dec128 | ∅ | dec64 | .. | -| ✗ | __dpd_extendsfdd | float | ∅ | dec64 | .. | -| ✗ | __bid_extendsfdd | float | ∅ | dec64 | .. | -| ✗ | __dpd_extendsftd | float | ∅ | dec128| .. | -| ✗ | __bid_extendsftd | float | ∅ | dec128| .. | -| ✗ | __dpd_extenddftd | double | ∅ | dec128| .. | -| ✗ | __bid_extenddftd | double | ∅ | dec128| .. | -| ✗ | __dpd_extendxftd |long double | ∅ | dec128| .. | -| ✗ | __bid_extendxftd |long double | ∅ | dec128| .. | -| ✗ | __dpd_truncdfsd | double | ∅ | dec32 | .. | -| ✗ | __bid_truncdfsd | double | ∅ | dec32 | .. | -| ✗ | __dpd_truncxfsd |long double | ∅ | dec32 | .. | -| ✗ | __bid_truncxfsd |long double | ∅ | dec32 | .. | -| ✗ | __dpd_trunctfsd |long double | ∅ | dec32 | .. | -| ✗ | __bid_trunctfsd |long double | ∅ | dec32 | .. | -| ✗ | __dpd_truncxfdd |long double | ∅ | dec64 | .. | -| ✗ | __bid_truncxfdd |long double | ∅ | dec64 | .. | -| ✗ | __dpd_trunctfdd |long double | ∅ | dec64 | .. | -| ✗ | __bid_trunctfdd |long double | ∅ | dec64 | .. | -| ✗ | __dpd_truncddsf | dec64 | ∅ | float | .. | -| ✗ | __bid_truncddsf | dec64 | ∅ | float | .. | -| ✗ | __dpd_trunctdsf | dec128 | ∅ | float | .. | -| ✗ | __bid_trunctdsf | dec128 | ∅ | float | .. | -| ✗ | __dpd_extendsddf | dec32 | ∅ | double| .. | -| ✗ | __bid_extendsddf | dec32 | ∅ | double| .. | -| ✗ | __dpd_trunctddf | dec128 | ∅ | double| .. | -| ✗ | __bid_trunctddf | dec128 | ∅ | double| .. | -| ✗ | __dpd_extendsdxf | dec32 | ∅ |long double| .. | -| ✗ | __bid_extendsdxf | dec32 | ∅ |long double| .. | -| ✗ | __dpd_extendddxf | dec64 | ∅ |long double| .. | -| ✗ | __bid_extendddxf | dec64 | ∅ |long double| .. | -| ✗ | __dpd_trunctdxf | dec128 | ∅ |long double| .. | -| ✗ | __bid_trunctdxf | dec128 | ∅ |long double| .. | -| ✗ | __dpd_extendsdtf | dec32 | ∅ |long double| .. | -| ✗ | __bid_extendsdtf | dec32 | ∅ |long double| .. | -| ✗ | __dpd_extendddtf | dec64 | ∅ |long double| .. | -| ✗ | __bid_extendddtf | dec64 | ∅ |long double| .. | -| ✗ | __dpd_extendsfsd | float | ∅ | dec32 | same size conversions | -| ✗ | __bid_extendsfsd | float | ∅ | dec32 | .. | -| ✗ | __dpd_extenddfdd | double | ∅ | dec64 | .. | -| ✗ | __bid_extenddfdd | double | ∅ | dec64 | .. | -| ✗ | __dpd_extendtftd |long double | ∅ | dec128| .. | -| ✗ | __bid_extendtftd |long double | ∅ | dec128| .. | -| ✗ | __dpd_truncsdsf | dec32 | ∅ | float | .. | -| ✗ | __bid_truncsdsf | dec32 | ∅ | float | .. | -| ✗ | __dpd_truncdddf | dec64 | ∅ | float | conversion | -| ✗ | __bid_truncdddf | dec64 | ∅ | float | .. | -| ✗ | __dpd_trunctdtf | dec128 | ∅ |long double| .. | -| ✗ | __bid_trunctdtf | dec128 | ∅ |long double| .. | -| ✗ | __dpd_fixsdsi | dec32 | ∅ | int | .. | -| ✗ | __bid_fixsdsi | dec32 | ∅ | int | .. | -| ✗ | __dpd_fixddsi | dec64 | ∅ | int | .. | -| ✗ | __bid_fixddsi | dec64 | ∅ | int | .. | -| ✗ | __dpd_fixtdsi | dec128 | ∅ | int | .. | -| ✗ | __bid_fixtdsi | dec128 | ∅ | int | .. | -| ✗ | __dpd_fixsddi | dec32 | ∅ | long | .. | -| ✗ | __bid_fixsddi | dec32 | ∅ | long | .. | -| ✗ | __dpd_fixdddi | dec64 | ∅ | long | .. | -| ✗ | __bid_fixdddi | dec64 | ∅ | long | .. | -| ✗ | __dpd_fixtddi | dec128 | ∅ | long | .. | -| ✗ | __bid_fixtddi | dec128 | ∅ | long | .. | -| ✗ | __dpd_fixunssdsi | dec32 | ∅ |unsigned int | .. All negative values become zero. | -| ✗ | __bid_fixunssdsi | dec32 | ∅ |unsigned int | .. | -| ✗ | __dpd_fixunsddsi | dec64 | ∅ |unsigned int | .. | -| ✗ | __bid_fixunsddsi | dec64 | ∅ |unsigned int | .. | -| ✗ | __dpd_fixunstdsi | dec128 | ∅ |unsigned int | .. | -| ✗ | __bid_fixunstdsi | dec128 | ∅ |unsigned int | .. | -| ✗ | __dpd_fixunssddi | dec32 | ∅ |unsigned long| .. | -| ✗ | __bid_fixunssddi | dec32 | ∅ |unsigned long| .. | -| ✗ | __dpd_fixunsdddi | dec64 | ∅ |unsigned long| .. | -| ✗ | __bid_fixunsdddi | dec64 | ∅ |unsigned long| .. | -| ✗ | __dpd_fixunstddi | dec128 | ∅ |unsigned long| .. | -| ✗ | __bid_fixunstddi | dec128 | ∅ |unsigned long| .. | -| ✗ | __dpd_floatsisd | int | ∅ | dec32 | .. | -| ✗ | __bid_floatsisd | int | ∅ | dec32 | .. | -| ✗ | __dpd_floatsidd | int | ∅ | dec64 | .. | -| ✗ | __bid_floatsidd | int | ∅ | dec64 | .. | -| ✗ | __dpd_floatsitd | int | ∅ | dec128 | .. | -| ✗ | __bid_floatsitd | int | ∅ | dec128 | .. | -| ✗ | __dpd_floatdisd | long | ∅ | dec32 | .. | -| ✗ | __bid_floatdisd | long | ∅ | dec32 | .. | -| ✗ | __dpd_floatdidd | long | ∅ | dec64 | .. | -| ✗ | __bid_floatdidd | long | ∅ | dec64 | .. | -| ✗ | __dpd_floatditd | long | ∅ | dec128 | .. | -| ✗ | __bid_floatditd | long | ∅ | dec128 | .. | -| ✗ | __dpd_floatunssisd | unsigned int| ∅ | dec32 | .. | -| ✗ | __bid_floatunssisd | unsigned int| ∅ | dec32 | .. | -| ✗ | __dpd_floatunssidd | unsigned int| ∅ | dec64 | .. | -| ✗ | __bid_floatunssidd | unsigned int| ∅ | dec64 | .. | -| ✗ | __dpd_floatunssitd | unsigned int| ∅ | dec128 | .. | -| ✗ | __bid_floatunssitd | unsigned int| ∅ | dec128 | .. | -| ✗ | __dpd_floatunsdisd |unsigned long| ∅ | dec32 | .. | -| ✗ | __bid_floatunsdisd |unsigned long| ∅ | dec32 | .. | -| ✗ | __dpd_floatunsdidd |unsigned long| ∅ | dec64 | .. | -| ✗ | __bid_floatunsdidd |unsigned long| ∅ | dec64 | .. | -| ✗ | __dpd_floatunsditd |unsigned long| ∅ | dec128 | .. | -| ✗ | __bid_floatunsditd |unsigned long| ∅ | dec128 | .. | -| | | | | | **Decimal Float Comparison** | -| ✗ | __dpd_unordsd2 | dec32 | dec32 | c_int | `a +-NaN or a +-NaN -> 1(nonzero), else -> 0` | -| ✗ | __bid_unordsd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_unorddd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_unorddd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_unordtd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_unordtd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_eqsd2 | dec32 | dec32 | c_int |`a!=+-NaN and b!=+-Nan and a==b -> 0, else -> 1(nonzero)`| -| ✗ | __bid_eqsd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_eqdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_eqdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_eqtd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_eqtd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_nesd2 | dec32 | dec32 | c_int | `a==+-NaN or b==+-NaN or a!=b -> 1(nonzero), else -> 0` | -| ✗ | __bid_nesd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_nedd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_nedd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_netd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_netd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_gesd2 | dec32 | dec32 | c_int | `a!=+-NaN and b!=+-NaN and a>=b -> >=0, else -> <0` | -| ✗ | __bid_gesd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_gedd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_gedd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_getd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_getd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_ltsd2 | dec32 | dec32 | c_int | `a!=+-NaN and b!=+-NaN and a<b -> <0, else -> >=0` | -| ✗ | __bid_ltsd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_ltdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_ltdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_lttd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_lttd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_lesd2 | dec32 | dec32 | c_int | `a!=+-NaN and b!=+-NaN and a<=b -> <=0, else -> >=0` | -| ✗ | __bid_lesd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_ledd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_ledd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_letd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_letd2 | dec128 | dec128 | c_int | .. | -| ✗ | __dpd_gtsd2 | dec32 | dec32 | c_int | `a!=+-NaN and b!=+-NaN and a>b -> >0, else -> <=0` | -| ✗ | __bid_gtsd2 | dec32 | dec32 | c_int | .. | -| ✗ | __dpd_gtdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __bid_gtdd2 | dec64 | dec64 | c_int | .. | -| ✗ | __dpd_gttd2 | dec128 | dec128 | c_int | .. | -| ✗ | __bid_gttd2 | dec128 | dec128 | c_int | .. | -| | | | | | **Decimal Float Arithmetic**[^options] | -| ✗ | __dpd_addsd3 | dec32 | dec32 | dec32 |`a + b`| -| ✗ | __bid_addsd3 | dec32 | dec32 | dec32 | .. | -| ✗ | __dpd_adddd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __bid_adddd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __dpd_addtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __bid_addtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __dpd_subsd3 | dec32 | dec32 | dec32 |`a - b`| -| ✗ | __bid_subsd3 | dec32 | dec32 | dec32 | .. | -| ✗ | __dpd_subdd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __bid_subdd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __dpd_subtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __bid_subtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __dpd_mulsd3 | dec32 | dec32 | dec32 |`a * b`| -| ✗ | __bid_mulsd3 | dec32 | dec32 | dec32 | .. | -| ✗ | __dpd_muldd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __bid_muldd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __dpd_multd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __bid_multd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __dpd_divsd3 | dec32 | dec32 | dec32 |`a / b`| -| ✗ | __bid_divsd3 | dec32 | dec32 | dec32 | .. | -| ✗ | __dpd_divdd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __bid_divdd3 | dec64 | dec64 | dec64 | .. | -| ✗ | __dpd_divtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __bid_divtd3 | dec128 | dec128 | dec128 | .. | -| ✗ | __dpd_negsd2 | dec32 | dec32 | dec32 | `-a` | -| ✗ | __bid_negsd2 | dec32 | dec32 | dec32 | .. | -| ✗ | __dpd_negdd2 | dec64 | dec64 | dec64 | .. | -| ✗ | __bid_negdd2 | dec64 | dec64 | dec64 | .. | -| ✗ | __dpd_negtd2 | dec128 | dec128 | dec128 | .. | -| ✗ | __bid_negtd2 | dec128 | dec128 | dec128 | .. | - -[^options]: These numbers include options with routines for +-0 and +-Nan. - -Fixed-point fractional library routines - -TODO brief explanation + implementation - -| Done | Name | a | b | Out | Comment | -| ------ | ------------- | --------- | --------- | --------- | -------------------------- | -| | | | | | **Fixed-Point Fractional** | - -Math functions according to C99 with gnu extension sincos. f16, f80 and f128 functions -are additionally supported by Zig, but not part of C standard. Alphabetically sorted. - -| Done | Name | a | b | Out | Comment | -| ---- | ------- | --------- | --------- | --------- | -------------------------- | -| ✓ | __ceilh | f16 | ∅ | f16 |smallest integer value not less than a| -| ✓ | ceilf | f32 | ∅ | f32 |If a is integer, +-0, +-NaN, or +-infinite, a itself is returned.| -| ✓ | ceil | f64 | ∅ | f64 | .. | -| ✓ | __ceilx | f80 | ∅ | f80 | | -| ✓ | ceilf128 | f128 | ∅ | f128 | .. PPC | -| ✓ | ceilq | f128 | ∅ | f128 | .. | -| ✓ | ceill |long double| ∅ |long double| .. | -| ✓ | __cosh | f16 | ∅ | f16 | `cos(a)=(e^(ia)+e^(-ia))/2`| -| ✓ | cosf | f32 | ∅ | f32 | .. | -| ✓ | cos | f64 | ∅ | f64 | .. | -| ✓ | __cosx | f80 | ∅ | f80 | .. | -| ✓ | cosf128 | f128 | ∅ | f128 | .. | -| ✓ | cosq | f128 | ∅ | f128 | .. PPC | -| ✓ | cosl |long double| ∅ |long double| .. | -| ✓ | __exph | f16 | ∅ | f16 | `e^a` with e base of natural logarithms| -| ✓ | expf | f32 | ∅ | f32 | .. | -| ✓ | exp | f64 | ∅ | f64 | .. | -| ✓ | __expx | f80 | ∅ | f80 | .. | -| ✓ | expf128 | f128 | ∅ | f128 | .. | -| ✓ | expq | f128 | ∅ | f128 | .. PPC | -| ✓ | expl |long double| ∅ |long double| .. | -| ✓ | __exp2h | f16 | ∅ | f16 | `2^a` | -| ✓ | exp2f | f32 | ∅ | f32 | .. | -| ✓ | exp2 | f64 | ∅ | f64 | .. | -| ✓ | __exp2x | f80 | ∅ | f80 | .. | -| ✓ | exp2f128 | f128 | ∅ | f128 | .. | -| ✓ | exp2q | f128 | ∅ | f128 | .. PPC | -| ✓ | exp2l |long double| ∅ |long double| .. | -| ✓ | __fabsh | f16 | ∅ | f16 | absolute value of a | -| ✓ | fabsf | f32 | ∅ | f32 | .. | -| ✓ | fabs | f64 | ∅ | f64 | .. | -| ✓ | __fabsx | f80 | ∅ | f80 | .. | -| ✓ | fabsf128 | f128 | ∅ | f128 | .. | -| ✓ | fabsq | f128 | ∅ | f128 | .. PPC | -| ✓ | fabsl |long double| ∅ |long double| .. | -| ✓ | __floorh | f16 | ∅ | f16 |largest integer value not greater than a| -| ✓ | floorf | f32 | ∅ | f32 |If a is integer, +-0, +-NaN, or +-infinite, a itself is returned.| -| ✓ | floor | f64 | ∅ | f64 | .. | -| ✓ | __floorx | f80 | ∅ | f80 | .. | -| ✓ | floorf128 | f128 | ∅ | f128 | .. | -| ✓ | floorq | f128 | ∅ | f128 | .. PPC | -| ✓ | floorl |long double| ∅ |long double| .. | -| ✓ | __fmah | f16 | 2xf16 | f16 | args a,b,c result `(a*b)+c`| -| ✓ | fmaf | f32 | 2xf32 | f32 |Fused multiply-add for hardware acceleration| -| ✓ | fma | f64 | 2xf64 | f64 | .. | -| ✓ | __fmax | f80 | 2xf80 | f80 | .. | -| ✓ | fmaf128 | f128 | 2xf128 | f128 | .. | -| ✓ | fmaq | f128 | 2xf128 | f128 | .. PPC | -| ✓ | fmal |long double|2xlong double|long double| .. | -| ✓ | __fmaxh | f16 | f16 | f16 | larger value of a,b | -| ✓ | fmaxf | f32 | f32 | f32 | .. | -| ✓ | fmax | f64 | f64 | f64 | .. | -| ✓ | __fmaxx | f80 | f80 | f80 | .. | -| ✓ | fmaxf128 | f128 | f128 | f128 | .. | -| ✓ | fmaxq | f128 | f128 | f128 | .. PPC | -| ✓ | fmaxl |long double|long double|long double| .. | -| ✓ | __fminh | f16 | f16 | f16 | smaller value of a,b | -| ✓ | fminf | f32 | f32 | f32 | .. | -| ✓ | fmin | f64 | f64 | f64 | .. | -| ✓ | __fminx | f80 | f80 | f80 | .. | -| ✓ | fminf128 | f128 | f128 | f128 | .. | -| ✓ | fminq | f128 | f128 | f128 | .. PPC | -| ✓ | fminl |long double|long double|long double| .. | -| ✓ | __fmodh | f16 | f16 | f16 |floating-point remainder of division a/b| -| ✓ | fmodf | f32 | f32 | f32 | .. | -| ✓ | fmod | f64 | f64 | f64 | .. | -| ✓ | __fmodx | f80 | f80 | f80 | .. | -| ✓ | fmodf128 | f128 | f128 | f128 | .. | -| ✓ | fmodq | f128 | f128 | f128 | .. PPC | -| ✓ | fmodl |long double|long double|long double| .. | -| ✓ | __logh | f16 | ∅ | f16 |natural (base-e) logarithm of a| -| ✓ | logf | f32 | ∅ | f32 | .. | -| ✓ | log | f64 | ∅ | f64 | .. | -| ✓ | __logx | f80 | ∅ | f80 | .. | -| ✓ | logf128 | f128 | ∅ | f128 | .. | -| ✓ | logq | f128 | ∅ | f128 | .. PPC | -| ✓ | logl |long double| ∅ |long double| .. | -| ✓ | __log10h | f16 | ∅ | f16 |common (base-10) logarithm of a| -| ✓ | log10f | f32 | ∅ | f32 | .. | -| ✓ | log10 | f64 | ∅ | f64 | .. | -| ✓ | __log10x | f80 | ∅ | f80 | .. | -| ✓ | log10f128 | f128 | ∅ | f128 | .. | -| ✓ | log10q | f128 | ∅ | f128 | .. PPC | -| ✓ | log10l |long double| ∅ |long double| .. | -| ✓ | __log2h | f16 | ∅ | f16 | base-2 logarithm of a | -| ✓ | log2f | f32 | ∅ | f32 | .. | -| ✓ | log2 | f64 | ∅ | f64 | .. | -| ✓ | __log2x | f80 | ∅ | f80 | .. | -| ✓ | log2f128 | f128 | ∅ | f128 | .. | -| ✓ | log2q | f128 | ∅ | f128 | .. PPC | -| ✓ | log2l |long double| ∅ |long double| .. | -| ✓ | __roundh | f16 | ∅ | f16 | a rounded to next int away from zero| -| ✓ | roundf | f32 | ∅ | f32 | .. | -| ✓ | round | f64 | ∅ | f64 | .. | -| ✓ | __roundx | f80 | ∅ | f80 | .. | -| ✓ | roundf128 | f128 | ∅ | f128 | .. | -| ✓ | roundq | f128 | ∅ | f128 | .. PPC | -| ✓ | roundl |long double| ∅ |long double| .. | -| ✓ | __sinh | f16 | ∅ | f16 | `sin(a)=(e^(ia)-e^(-ia))/2`| -| ✓ | sinf | f32 | ∅ | f32 | .. | -| ✓ | sin | f64 | ∅ | f64 | .. | -| ✓ | __sinx | f80 | ∅ | f80 | .. | -| ✓ | sinf128 | f128 | ∅ | f128 | .. | -| ✓ | sinq | f128 | ∅ | f128 | .. PPC | -| ✓ | sinl |long double| ∅ |long double| .. | -| ✓ | __sincosh | f16 | 2x *f16 | ∅ |sin and cos of the same angle a| -| ✓ | sincosf | f32 | 2x *f32 | ∅ |args a,*b,*c, `b.*=sin(x),c.*=cos(x)`| -| ✓ | sincos | f64 | 2x *f64 | ∅ | .. | -| ✓ | __sincosx | f80 | 2x *f80 | ∅ | .. | -| ✓ | sincosf128 | f128 | 2x *f128 | ∅ | .. | -| ✓ | sincosq | f128 | 2x *f128 | ∅ | .. PPC | -| ✓ | sincosl |long double| 2x *long double|∅ | .. | -| ✓ | __sqrth | f16 | ∅ | f16 | square root of a (find `r st. a=r^2`)| -| ✓ | sqrtf | f32 | ∅ | f32 | .. | -| ✓ | sqrt | f64 | ∅ | f64 | .. | -| ✓ | __sqrtx | f80 | ∅ | f80 | .. | -| ✓ | sqrtf128 | f128 | ∅ | f128 | .. | -| ✓ | sqrtq | f128 | ∅ | f128 | .. PPC | -| ✓ | sqrtl |long double| ∅ |long double| .. | -| ✓ | __tanh | f16 | ∅ | f16 | `tan(x)=sin(x)/cos(x) | -| ✓ | tanf | f32 | ∅ | f32 | .. | -| ✓ | tan | f64 | ∅ | f64 | .. | -| ✓ | __tanx | f80 | ∅ | f80 | .. | -| ✓ | tanf128 | f128 | ∅ | f128 | .. | -| ✓ | tanq | f128 | ∅ | f128 | .. PPC | -| ✓ | tanl |long double| ∅ |long double| .. | -| ✓ | __trunch | f16 | ∅ | f16 | a rounded to next int towards zero| -| ✓ | truncf | f32 | ∅ | f32 | .. | -| ✓ | trunc | f64 | ∅ | f64 | .. | -| ✓ | __truncx | f80 | ∅ | f80 | .. | -| ✓ | truncf128 | f128 | ∅ | f128 | .. | -| ✓ | truncq | f128 | ∅ | f128 | .. PPC | -| ✓ | truncl |long double| ∅ |long double| .. | - -Arbitrary Precision Big Integer (BigInt) library routines - -TODO brief description - -| Done | Name | result| a | b | size| ret | Comment | -| ---- | ------- | ----- | ----- | ----- | --- | ----- |---------------------- | -| | | | | | | |**BigInt Bit Operation**| -| | | | | | | |**BigInt Comparison** | -| | | | | | | |**BigInt Arithmetic** | -|✓|__udivei4 |[*c]u32|[*c]u32|[*c]u32|usize|void | `a / b` | -|✓|__umodei4 |[*c]u32|[*c]u32|[*c]u32|usize|void | `a % b` | -|✗|__divei4 |[*c]u32|[*c]u32|[*c]u32|usize|void | `a / b` | -|✗|__modei4 |[*c]u32|[*c]u32|[*c]u32|usize|void | `a % b` | -| | | | | | | |**BigInt Arithmetic with Trapping Overflow**| -| | | | | | | |**BigInt Arithmetic which Return on Overflow**[^noptr_faster]| - -Further content (conditionally) exported with C abi: - -ARM-only routines - -| Done | Name | a | b | Out | Comment | -| ---- | -------- | --- | --- | -----| ----------------------| -| | | | | | **Float Comparison** | -|✗|__aeabi_cfcmpeq | f32 | f32 | void | `a == b` result in PSR ZC flags[^PSRZC] | -|✗|__aeabi_cfcmple | f32 | f32 | void | `a <= b` result .. | -|✗|__aeabi_cfrcmple| f32 | f32 | void | `b <= a` .. | -|✗|__aeabi_cdcmpeq | f64 | f64 | void | `a == b` .. | -|✗|__aeabi_cdcmple | f64 | f64 | void | `a <= b` .. | -|✗|__aeabi_cdrcmple| f64 | f64 | void | `b <= a` .. | -| | | | | | **Float Arithmetic** | -|✗|__aeabi_frsub | f64 | f64 | f64 | `b - a` | -|✗|__aeabi_drsub | f64 | f64 | f64 | .. | -| | | | | | **Special** | -|✓|__aeabi_read_tp | ∅ | ∅ | *u8 | ret tls pointer | -|✗|__aeabi_idiv0 | i32 | ∅ | i32 | div by 0 modifier | -|✗|__aeabi_ldiv0 | i64 | ∅ | i64 | div by 0 modifier | -| | | | | | **Unaligned memory access** | -|✗|__aeabi_uread4 |[*]u8| ∅ | i32 | ret value read | -|✗|__aeabi_uwrite4 | i32 |[*]u8| i32 | ret value written | -|✗|__aeabi_uread8 |[*]u8| ∅ | i64 | .. | -|✗|__aeabi_uwrite8 | i64 |[*]u8| i64 | .. | - - -| Done | Name | a | b | c | Comment | -| ---- | -------- | --- | --- | -----| ----------------------| -| | | | | | **Memory copy, move and set** | -|✓|__aeabi_memcpy8 |[*]u8|[*]u8| usize| *dest, *src, size | -|✓|__aeabi_memcpy4 |[*]u8|[*]u8| usize| .. | -|✓|__aeabi_memcpy |[*]u8|[*]u8| usize| .. | -|✓|__aeabi_memmove8|[*]u8|[*]u8| usize| *dest, *src, size | -|✓|__aeabi_memmove4|[*]u8|[*]u8| usize| .. | -|✓|__aeabi_memmove |[*]u8|[*]u8| usize| .. | -|✓|__aeabi_memset8 |[*]u8|usize| i32 | *dest, size, char | -|✓|__aeabi_memset4 |[*]u8|usize| i32 | .. | -|✓|__aeabi_memset |[*]u8|usize| i32 | .. | -|✓|__aeabi_memclr8 |[*]u8| u32 | usize| *dest, size | -|✓|__aeabi_memclr4 |[*]u8| u32 | usize| .. | -|✓|__aeabi_memclr |[*]u8| u32 | usize| .. | -|✓|__aeabi_uwrite8 | i64 |[*]u8| i64 | .. | - -- __aeabi_read_tp - -[^PSRZC]: return result in the CPSR Z and C flag. C is clear only if the -operands are ordered and the first operand is less than the second. -Z is set only when the operands are ordered and equal. -Preserves all core registers except ip, lr, and the CPSR. - -- aarch64 outline atomics -- atomics -- bcmp -- clear cache -- memory routines (memcmp, memcpy, memset, memmove) -- msvc things like _alldiv, _aulldiv, _allrem -- objective-c __isPlatformVersionAtLeast check -- stack probe routines -- tls emulation diff --git a/lib/compiler_rt/arm.zig b/lib/compiler_rt/arm.zig index 94cd4feb8c..b358fbfa80 100644 --- a/lib/compiler_rt/arm.zig +++ b/lib/compiler_rt/arm.zig @@ -51,7 +51,7 @@ const __udivmodsi4 = @import("int.zig").__udivmodsi4; const __divmoddi4 = @import("int.zig").__divmoddi4; const __udivmoddi4 = @import("int.zig").__udivmoddi4; -extern fn memset(dest: ?[*]u8, c: u8, n: usize) ?[*]u8; +extern fn memset(dest: ?[*]u8, c: i32, n: usize) ?[*]u8; extern fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, n: usize) ?[*]u8; extern fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) ?[*]u8; @@ -81,17 +81,17 @@ pub fn __aeabi_memmove8(dest: [*]u8, src: [*]u8, n: usize) callconv(.AAPCS) void _ = memmove(dest, src, n); } -pub fn __aeabi_memset(dest: [*]u8, n: usize, c: u8) callconv(.AAPCS) void { +pub fn __aeabi_memset(dest: [*]u8, n: usize, c: i32) callconv(.AAPCS) void { @setRuntimeSafety(false); // This is dentical to the standard `memset` definition but with the last // two arguments swapped _ = memset(dest, c, n); } -pub fn __aeabi_memset4(dest: [*]u8, n: usize, c: u8) callconv(.AAPCS) void { +pub fn __aeabi_memset4(dest: [*]u8, n: usize, c: i32) callconv(.AAPCS) void { @setRuntimeSafety(false); _ = memset(dest, c, n); } -pub fn __aeabi_memset8(dest: [*]u8, n: usize, c: u8) callconv(.AAPCS) void { +pub fn __aeabi_memset8(dest: [*]u8, n: usize, c: i32) callconv(.AAPCS) void { @setRuntimeSafety(false); _ = memset(dest, c, n); } diff --git a/lib/compiler_rt/clear_cache.zig b/lib/compiler_rt/clear_cache.zig index 93e6846ae5..5038c4061a 100644 --- a/lib/compiler_rt/clear_cache.zig +++ b/lib/compiler_rt/clear_cache.zig @@ -12,7 +12,7 @@ pub const panic = @import("common.zig").panic; // specified range. comptime { - _ = clear_cache; + _ = &clear_cache; } fn clear_cache(start: usize, end: usize) callconv(.C) void { diff --git a/lib/compiler_rt/divc3.zig b/lib/compiler_rt/divc3.zig index 4e4dba2856..c4241c1483 100644 --- a/lib/compiler_rt/divc3.zig +++ b/lib/compiler_rt/divc3.zig @@ -3,7 +3,6 @@ const isNan = std.math.isNan; const isInf = std.math.isInf; const scalbn = std.math.scalbn; const ilogb = std.math.ilogb; -const max = std.math.max; const fabs = std.math.fabs; const maxInt = std.math.maxInt; const minInt = std.math.minInt; @@ -17,7 +16,7 @@ pub inline fn divc3(comptime T: type, a: T, b: T, c_in: T, d_in: T) Complex(T) { var d = d_in; // logbw used to prevent under/over-flow - const logbw = ilogb(max(fabs(c), fabs(d))); + const logbw = ilogb(@max(fabs(c), fabs(d))); const logbw_finite = logbw != maxInt(i32) and logbw != minInt(i32); const ilogbw = if (logbw_finite) b: { c = scalbn(c, -logbw); diff --git a/lib/compiler_rt/divtc3.zig b/lib/compiler_rt/divtc3.zig index d5b1d12059..592681607d 100644 --- a/lib/compiler_rt/divtc3.zig +++ b/lib/compiler_rt/divtc3.zig @@ -4,7 +4,11 @@ const Complex = @import("./mulc3.zig").Complex; comptime { if (@import("builtin").zig_backend != .stage2_c) { - @export(__divtc3, .{ .name = "__divtc3", .linkage = common.linkage, .visibility = common.visibility }); + if (common.want_ppc_abi) { + @export(__divtc3, .{ .name = "__divkc3", .linkage = common.linkage, .visibility = common.visibility }); + } else { + @export(__divtc3, .{ .name = "__divtc3", .linkage = common.linkage, .visibility = common.visibility }); + } } } diff --git a/lib/compiler_rt/emutls.zig b/lib/compiler_rt/emutls.zig index 05a2de97a8..47c71efadd 100644 --- a/lib/compiler_rt/emutls.zig +++ b/lib/compiler_rt/emutls.zig @@ -49,7 +49,7 @@ const simple_allocator = struct { /// Allocate a memory chunk. pub fn advancedAlloc(alignment: u29, size: usize) [*]u8 { - const minimal_alignment = std.math.max(@alignOf(usize), alignment); + const minimal_alignment = @max(@alignOf(usize), alignment); var aligned_ptr: ?*anyopaque = undefined; if (std.c.posix_memalign(&aligned_ptr, minimal_alignment, size) != 0) { @@ -170,7 +170,7 @@ const current_thread_storage = struct { // make it to contains at least 16 objects (to avoid too much // reallocation at startup). - const size = std.math.max(16, index); + const size = @max(16, index); // create a new array and store it. var array: *ObjectArray = ObjectArray.init(size); diff --git a/lib/compiler_rt/fixtfti.zig b/lib/compiler_rt/fixtfti.zig index 03d861f92e..957df1dd5e 100644 --- a/lib/compiler_rt/fixtfti.zig +++ b/lib/compiler_rt/fixtfti.zig @@ -7,6 +7,8 @@ pub const panic = common.panic; comptime { if (common.want_windows_v2u64_abi) { @export(__fixtfti_windows_x86_64, .{ .name = "__fixtfti", .linkage = common.linkage, .visibility = common.visibility }); + } else if (common.want_ppc_abi) { + @export(__fixtfti, .{ .name = "__fixkfti", .linkage = common.linkage, .visibility = common.visibility }); } else { @export(__fixtfti, .{ .name = "__fixtfti", .linkage = common.linkage, .visibility = common.visibility }); } diff --git a/lib/compiler_rt/fixunstfti.zig b/lib/compiler_rt/fixunstfti.zig index 72b529e0c9..b77bbb6689 100644 --- a/lib/compiler_rt/fixunstfti.zig +++ b/lib/compiler_rt/fixunstfti.zig @@ -7,6 +7,8 @@ pub const panic = common.panic; comptime { if (common.want_windows_v2u64_abi) { @export(__fixunstfti_windows_x86_64, .{ .name = "__fixunstfti", .linkage = common.linkage, .visibility = common.visibility }); + } else if (common.want_ppc_abi) { + @export(__fixunstfti, .{ .name = "__fixunskfti", .linkage = common.linkage, .visibility = common.visibility }); } else { @export(__fixunstfti, .{ .name = "__fixunstfti", .linkage = common.linkage, .visibility = common.visibility }); } diff --git a/lib/compiler_rt/floattitf.zig b/lib/compiler_rt/floattitf.zig index 62c215c986..32cc8d49b5 100644 --- a/lib/compiler_rt/floattitf.zig +++ b/lib/compiler_rt/floattitf.zig @@ -7,6 +7,8 @@ pub const panic = common.panic; comptime { if (common.want_windows_v2u64_abi) { @export(__floattitf_windows_x86_64, .{ .name = "__floattitf", .linkage = common.linkage, .visibility = common.visibility }); + } else if (common.want_ppc_abi) { + @export(__floattitf, .{ .name = "__floattikf", .linkage = common.linkage, .visibility = common.visibility }); } else { @export(__floattitf, .{ .name = "__floattitf", .linkage = common.linkage, .visibility = common.visibility }); } diff --git a/lib/compiler_rt/multc3.zig b/lib/compiler_rt/multc3.zig index 89054e4af8..cb2ea8f106 100644 --- a/lib/compiler_rt/multc3.zig +++ b/lib/compiler_rt/multc3.zig @@ -5,7 +5,11 @@ pub const panic = common.panic; comptime { if (@import("builtin").zig_backend != .stage2_c) { - @export(__multc3, .{ .name = "__multc3", .linkage = common.linkage, .visibility = common.visibility }); + if (common.want_ppc_abi) { + @export(__multc3, .{ .name = "__mulkc3", .linkage = common.linkage, .visibility = common.visibility }); + } else { + @export(__multc3, .{ .name = "__multc3", .linkage = common.linkage, .visibility = common.visibility }); + } } } diff --git a/lib/compiler_rt/negtf2.zig b/lib/compiler_rt/negtf2.zig index 46d498ab97..6332d44ccf 100644 --- a/lib/compiler_rt/negtf2.zig +++ b/lib/compiler_rt/negtf2.zig @@ -3,7 +3,11 @@ const common = @import("./common.zig"); pub const panic = common.panic; comptime { - @export(__negtf2, .{ .name = "__negtf2", .linkage = common.linkage, .visibility = common.visibility }); + if (common.want_ppc_abi) { + @export(__negtf2, .{ .name = "__negkf2", .linkage = common.linkage, .visibility = common.visibility }); + } else { + @export(__negtf2, .{ .name = "__negtf2", .linkage = common.linkage, .visibility = common.visibility }); + } } fn __negtf2(a: f128) callconv(.C) f128 { diff --git a/lib/compiler_rt/powiXf2.zig b/lib/compiler_rt/powiXf2.zig index 581dd4a909..6cb80c959e 100644 --- a/lib/compiler_rt/powiXf2.zig +++ b/lib/compiler_rt/powiXf2.zig @@ -13,7 +13,11 @@ comptime { @export(__powihf2, .{ .name = "__powihf2", .linkage = common.linkage, .visibility = common.visibility }); @export(__powisf2, .{ .name = "__powisf2", .linkage = common.linkage, .visibility = common.visibility }); @export(__powidf2, .{ .name = "__powidf2", .linkage = common.linkage, .visibility = common.visibility }); - @export(__powitf2, .{ .name = "__powitf2", .linkage = common.linkage, .visibility = common.visibility }); + if (common.want_ppc_abi) { + @export(__powitf2, .{ .name = "__powikf2", .linkage = common.linkage, .visibility = common.visibility }); + } else { + @export(__powitf2, .{ .name = "__powitf2", .linkage = common.linkage, .visibility = common.visibility }); + } @export(__powixf2, .{ .name = "__powixf2", .linkage = common.linkage, .visibility = common.visibility }); } diff --git a/lib/compiler_rt/powiXf2_test.zig b/lib/compiler_rt/powiXf2_test.zig index b1f9d2b538..5f7828c3e3 100644 --- a/lib/compiler_rt/powiXf2_test.zig +++ b/lib/compiler_rt/powiXf2_test.zig @@ -32,17 +32,18 @@ fn test__powixf2(a: f80, b: i32, expected: f80) !void { } test "powihf2" { + const inf_f16 = math.inf(f16); try test__powisf2(0, 0, 1); try test__powihf2(1, 0, 1); try test__powihf2(1.5, 0, 1); try test__powihf2(2, 0, 1); - try test__powihf2(math.inf_f16, 0, 1); + try test__powihf2(inf_f16, 0, 1); try test__powihf2(-0.0, 0, 1); try test__powihf2(-1, 0, 1); try test__powihf2(-1.5, 0, 1); try test__powihf2(-2, 0, 1); - try test__powihf2(-math.inf_f16, 0, 1); + try test__powihf2(-inf_f16, 0, 1); try test__powihf2(0, 1, 0); try test__powihf2(0, 2, 0); @@ -65,35 +66,35 @@ test "powihf2" { try test__powihf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFE)), 1); try test__powihf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFF)), 1); - try test__powihf2(math.inf_f16, 1, math.inf_f16); - try test__powihf2(math.inf_f16, 2, math.inf_f16); - try test__powihf2(math.inf_f16, 3, math.inf_f16); - try test__powihf2(math.inf_f16, 4, math.inf_f16); - try test__powihf2(math.inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f16); - try test__powihf2(math.inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFF)), math.inf_f16); - - try test__powihf2(-math.inf_f16, 1, -math.inf_f16); - try test__powihf2(-math.inf_f16, 2, math.inf_f16); - try test__powihf2(-math.inf_f16, 3, -math.inf_f16); - try test__powihf2(-math.inf_f16, 4, math.inf_f16); - try test__powihf2(-math.inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f16); - try test__powihf2(-math.inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -math.inf_f16); + try test__powihf2(inf_f16, 1, inf_f16); + try test__powihf2(inf_f16, 2, inf_f16); + try test__powihf2(inf_f16, 3, inf_f16); + try test__powihf2(inf_f16, 4, inf_f16); + try test__powihf2(inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f16); + try test__powihf2(inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFF)), inf_f16); + + try test__powihf2(-inf_f16, 1, -inf_f16); + try test__powihf2(-inf_f16, 2, inf_f16); + try test__powihf2(-inf_f16, 3, -inf_f16); + try test__powihf2(-inf_f16, 4, inf_f16); + try test__powihf2(-inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f16); + try test__powihf2(-inf_f16, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -inf_f16); // - try test__powihf2(0, -1, math.inf_f16); - try test__powihf2(0, -2, math.inf_f16); - try test__powihf2(0, -3, math.inf_f16); - try test__powihf2(0, -4, math.inf_f16); - try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f16); // 0 ^ anything = +inf - try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000001)), math.inf_f16); - try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f16); - - try test__powihf2(-0.0, -1, -math.inf_f16); - try test__powihf2(-0.0, -2, math.inf_f16); - try test__powihf2(-0.0, -3, -math.inf_f16); - try test__powihf2(-0.0, -4, math.inf_f16); - try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f16); // -0 ^ anything even = +inf - try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -math.inf_f16); // -0 ^ anything odd = -inf - try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f16); + try test__powihf2(0, -1, inf_f16); + try test__powihf2(0, -2, inf_f16); + try test__powihf2(0, -3, inf_f16); + try test__powihf2(0, -4, inf_f16); + try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000002)), inf_f16); // 0 ^ anything = +inf + try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000001)), inf_f16); + try test__powihf2(0, @bitCast(i32, @as(u32, 0x80000000)), inf_f16); + + try test__powihf2(-0.0, -1, -inf_f16); + try test__powihf2(-0.0, -2, inf_f16); + try test__powihf2(-0.0, -3, -inf_f16); + try test__powihf2(-0.0, -4, inf_f16); + try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), inf_f16); // -0 ^ anything even = +inf + try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -inf_f16); // -0 ^ anything odd = -inf + try test__powihf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), inf_f16); try test__powihf2(1, -1, 1); try test__powihf2(1, -2, 1); @@ -103,21 +104,21 @@ test "powihf2" { try test__powihf2(1, @bitCast(i32, @as(u32, 0x80000001)), 1); try test__powihf2(1, @bitCast(i32, @as(u32, 0x80000000)), 1); - try test__powihf2(math.inf_f16, -1, 0); - try test__powihf2(math.inf_f16, -2, 0); - try test__powihf2(math.inf_f16, -3, 0); - try test__powihf2(math.inf_f16, -4, 0); - try test__powihf2(math.inf_f16, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powihf2(math.inf_f16, @bitCast(i32, @as(u32, 0x80000001)), 0); - try test__powihf2(math.inf_f16, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powihf2(inf_f16, -1, 0); + try test__powihf2(inf_f16, -2, 0); + try test__powihf2(inf_f16, -3, 0); + try test__powihf2(inf_f16, -4, 0); + try test__powihf2(inf_f16, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powihf2(inf_f16, @bitCast(i32, @as(u32, 0x80000001)), 0); + try test__powihf2(inf_f16, @bitCast(i32, @as(u32, 0x80000000)), 0); // - try test__powihf2(-math.inf_f16, -1, -0.0); - try test__powihf2(-math.inf_f16, -2, 0); - try test__powihf2(-math.inf_f16, -3, -0.0); - try test__powihf2(-math.inf_f16, -4, 0); - try test__powihf2(-math.inf_f16, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powihf2(-math.inf_f16, @bitCast(i32, @as(u32, 0x80000001)), -0.0); - try test__powihf2(-math.inf_f16, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powihf2(-inf_f16, -1, -0.0); + try test__powihf2(-inf_f16, -2, 0); + try test__powihf2(-inf_f16, -3, -0.0); + try test__powihf2(-inf_f16, -4, 0); + try test__powihf2(-inf_f16, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powihf2(-inf_f16, @bitCast(i32, @as(u32, 0x80000001)), -0.0); + try test__powihf2(-inf_f16, @bitCast(i32, @as(u32, 0x80000000)), 0); try test__powihf2(2, 10, 1024.0); try test__powihf2(-2, 10, 1024.0); @@ -128,8 +129,8 @@ test "powihf2" { try test__powihf2(-2, 14, 16384.0); try test__powihf2(2, 15, 32768.0); try test__powihf2(-2, 15, -32768.0); - try test__powihf2(2, 16, math.inf_f16); - try test__powihf2(-2, 16, math.inf_f16); + try test__powihf2(2, 16, inf_f16); + try test__powihf2(-2, 16, inf_f16); try test__powihf2(2, -13, 1.0 / 8192.0); try test__powihf2(-2, -13, -1.0 / 8192.0); @@ -140,17 +141,18 @@ test "powihf2" { } test "powisf2" { + const inf_f32 = math.inf(f32); try test__powisf2(0, 0, 1); try test__powisf2(1, 0, 1); try test__powisf2(1.5, 0, 1); try test__powisf2(2, 0, 1); - try test__powisf2(math.inf_f32, 0, 1); + try test__powisf2(inf_f32, 0, 1); try test__powisf2(-0.0, 0, 1); try test__powisf2(-1, 0, 1); try test__powisf2(-1.5, 0, 1); try test__powisf2(-2, 0, 1); - try test__powisf2(-math.inf_f32, 0, 1); + try test__powisf2(-inf_f32, 0, 1); try test__powisf2(0, 1, 0); try test__powisf2(0, 2, 0); @@ -173,35 +175,35 @@ test "powisf2" { try test__powisf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFE)), 1); try test__powisf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFF)), 1); - try test__powisf2(math.inf_f32, 1, math.inf_f32); - try test__powisf2(math.inf_f32, 2, math.inf_f32); - try test__powisf2(math.inf_f32, 3, math.inf_f32); - try test__powisf2(math.inf_f32, 4, math.inf_f32); - try test__powisf2(math.inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f32); - try test__powisf2(math.inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFF)), math.inf_f32); - - try test__powisf2(-math.inf_f32, 1, -math.inf_f32); - try test__powisf2(-math.inf_f32, 2, math.inf_f32); - try test__powisf2(-math.inf_f32, 3, -math.inf_f32); - try test__powisf2(-math.inf_f32, 4, math.inf_f32); - try test__powisf2(-math.inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f32); - try test__powisf2(-math.inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -math.inf_f32); - - try test__powisf2(0, -1, math.inf_f32); - try test__powisf2(0, -2, math.inf_f32); - try test__powisf2(0, -3, math.inf_f32); - try test__powisf2(0, -4, math.inf_f32); - try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f32); - try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000001)), math.inf_f32); - try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f32); - - try test__powisf2(-0.0, -1, -math.inf_f32); - try test__powisf2(-0.0, -2, math.inf_f32); - try test__powisf2(-0.0, -3, -math.inf_f32); - try test__powisf2(-0.0, -4, math.inf_f32); - try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f32); - try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -math.inf_f32); - try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f32); + try test__powisf2(inf_f32, 1, inf_f32); + try test__powisf2(inf_f32, 2, inf_f32); + try test__powisf2(inf_f32, 3, inf_f32); + try test__powisf2(inf_f32, 4, inf_f32); + try test__powisf2(inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f32); + try test__powisf2(inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFF)), inf_f32); + + try test__powisf2(-inf_f32, 1, -inf_f32); + try test__powisf2(-inf_f32, 2, inf_f32); + try test__powisf2(-inf_f32, 3, -inf_f32); + try test__powisf2(-inf_f32, 4, inf_f32); + try test__powisf2(-inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f32); + try test__powisf2(-inf_f32, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -inf_f32); + + try test__powisf2(0, -1, inf_f32); + try test__powisf2(0, -2, inf_f32); + try test__powisf2(0, -3, inf_f32); + try test__powisf2(0, -4, inf_f32); + try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000002)), inf_f32); + try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000001)), inf_f32); + try test__powisf2(0, @bitCast(i32, @as(u32, 0x80000000)), inf_f32); + + try test__powisf2(-0.0, -1, -inf_f32); + try test__powisf2(-0.0, -2, inf_f32); + try test__powisf2(-0.0, -3, -inf_f32); + try test__powisf2(-0.0, -4, inf_f32); + try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), inf_f32); + try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -inf_f32); + try test__powisf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), inf_f32); try test__powisf2(1, -1, 1); try test__powisf2(1, -2, 1); @@ -211,21 +213,21 @@ test "powisf2" { try test__powisf2(1, @bitCast(i32, @as(u32, 0x80000001)), 1); try test__powisf2(1, @bitCast(i32, @as(u32, 0x80000000)), 1); - try test__powisf2(math.inf_f32, -1, 0); - try test__powisf2(math.inf_f32, -2, 0); - try test__powisf2(math.inf_f32, -3, 0); - try test__powisf2(math.inf_f32, -4, 0); - try test__powisf2(math.inf_f32, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powisf2(math.inf_f32, @bitCast(i32, @as(u32, 0x80000001)), 0); - try test__powisf2(math.inf_f32, @bitCast(i32, @as(u32, 0x80000000)), 0); - - try test__powisf2(-math.inf_f32, -1, -0.0); - try test__powisf2(-math.inf_f32, -2, 0); - try test__powisf2(-math.inf_f32, -3, -0.0); - try test__powisf2(-math.inf_f32, -4, 0); - try test__powisf2(-math.inf_f32, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powisf2(-math.inf_f32, @bitCast(i32, @as(u32, 0x80000001)), -0.0); - try test__powisf2(-math.inf_f32, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powisf2(inf_f32, -1, 0); + try test__powisf2(inf_f32, -2, 0); + try test__powisf2(inf_f32, -3, 0); + try test__powisf2(inf_f32, -4, 0); + try test__powisf2(inf_f32, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powisf2(inf_f32, @bitCast(i32, @as(u32, 0x80000001)), 0); + try test__powisf2(inf_f32, @bitCast(i32, @as(u32, 0x80000000)), 0); + + try test__powisf2(-inf_f32, -1, -0.0); + try test__powisf2(-inf_f32, -2, 0); + try test__powisf2(-inf_f32, -3, -0.0); + try test__powisf2(-inf_f32, -4, 0); + try test__powisf2(-inf_f32, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powisf2(-inf_f32, @bitCast(i32, @as(u32, 0x80000001)), -0.0); + try test__powisf2(-inf_f32, @bitCast(i32, @as(u32, 0x80000000)), 0); try test__powisf2(2.0, 10, 1024.0); try test__powisf2(-2, 10, 1024.0); @@ -244,17 +246,18 @@ test "powisf2" { } test "powidf2" { + const inf_f64 = math.inf(f64); try test__powidf2(0, 0, 1); try test__powidf2(1, 0, 1); try test__powidf2(1.5, 0, 1); try test__powidf2(2, 0, 1); - try test__powidf2(math.inf_f64, 0, 1); + try test__powidf2(inf_f64, 0, 1); try test__powidf2(-0.0, 0, 1); try test__powidf2(-1, 0, 1); try test__powidf2(-1.5, 0, 1); try test__powidf2(-2, 0, 1); - try test__powidf2(-math.inf_f64, 0, 1); + try test__powidf2(-inf_f64, 0, 1); try test__powidf2(0, 1, 0); try test__powidf2(0, 2, 0); @@ -277,35 +280,35 @@ test "powidf2" { try test__powidf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFE)), 1); try test__powidf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFF)), 1); - try test__powidf2(math.inf_f64, 1, math.inf_f64); - try test__powidf2(math.inf_f64, 2, math.inf_f64); - try test__powidf2(math.inf_f64, 3, math.inf_f64); - try test__powidf2(math.inf_f64, 4, math.inf_f64); - try test__powidf2(math.inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f64); - try test__powidf2(math.inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFF)), math.inf_f64); - - try test__powidf2(-math.inf_f64, 1, -math.inf_f64); - try test__powidf2(-math.inf_f64, 2, math.inf_f64); - try test__powidf2(-math.inf_f64, 3, -math.inf_f64); - try test__powidf2(-math.inf_f64, 4, math.inf_f64); - try test__powidf2(-math.inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f64); - try test__powidf2(-math.inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -math.inf_f64); - - try test__powidf2(0, -1, math.inf_f64); - try test__powidf2(0, -2, math.inf_f64); - try test__powidf2(0, -3, math.inf_f64); - try test__powidf2(0, -4, math.inf_f64); - try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f64); - try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000001)), math.inf_f64); - try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f64); - - try test__powidf2(-0.0, -1, -math.inf_f64); - try test__powidf2(-0.0, -2, math.inf_f64); - try test__powidf2(-0.0, -3, -math.inf_f64); - try test__powidf2(-0.0, -4, math.inf_f64); - try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f64); - try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -math.inf_f64); - try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f64); + try test__powidf2(inf_f64, 1, inf_f64); + try test__powidf2(inf_f64, 2, inf_f64); + try test__powidf2(inf_f64, 3, inf_f64); + try test__powidf2(inf_f64, 4, inf_f64); + try test__powidf2(inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f64); + try test__powidf2(inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFF)), inf_f64); + + try test__powidf2(-inf_f64, 1, -inf_f64); + try test__powidf2(-inf_f64, 2, inf_f64); + try test__powidf2(-inf_f64, 3, -inf_f64); + try test__powidf2(-inf_f64, 4, inf_f64); + try test__powidf2(-inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f64); + try test__powidf2(-inf_f64, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -inf_f64); + + try test__powidf2(0, -1, inf_f64); + try test__powidf2(0, -2, inf_f64); + try test__powidf2(0, -3, inf_f64); + try test__powidf2(0, -4, inf_f64); + try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000002)), inf_f64); + try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000001)), inf_f64); + try test__powidf2(0, @bitCast(i32, @as(u32, 0x80000000)), inf_f64); + + try test__powidf2(-0.0, -1, -inf_f64); + try test__powidf2(-0.0, -2, inf_f64); + try test__powidf2(-0.0, -3, -inf_f64); + try test__powidf2(-0.0, -4, inf_f64); + try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), inf_f64); + try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -inf_f64); + try test__powidf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), inf_f64); try test__powidf2(1, -1, 1); try test__powidf2(1, -2, 1); @@ -315,21 +318,21 @@ test "powidf2" { try test__powidf2(1, @bitCast(i32, @as(u32, 0x80000001)), 1); try test__powidf2(1, @bitCast(i32, @as(u32, 0x80000000)), 1); - try test__powidf2(math.inf_f64, -1, 0); - try test__powidf2(math.inf_f64, -2, 0); - try test__powidf2(math.inf_f64, -3, 0); - try test__powidf2(math.inf_f64, -4, 0); - try test__powidf2(math.inf_f64, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powidf2(math.inf_f64, @bitCast(i32, @as(u32, 0x80000001)), 0); - try test__powidf2(math.inf_f64, @bitCast(i32, @as(u32, 0x80000000)), 0); - - try test__powidf2(-math.inf_f64, -1, -0.0); - try test__powidf2(-math.inf_f64, -2, 0); - try test__powidf2(-math.inf_f64, -3, -0.0); - try test__powidf2(-math.inf_f64, -4, 0); - try test__powidf2(-math.inf_f64, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powidf2(-math.inf_f64, @bitCast(i32, @as(u32, 0x80000001)), -0.0); - try test__powidf2(-math.inf_f64, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powidf2(inf_f64, -1, 0); + try test__powidf2(inf_f64, -2, 0); + try test__powidf2(inf_f64, -3, 0); + try test__powidf2(inf_f64, -4, 0); + try test__powidf2(inf_f64, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powidf2(inf_f64, @bitCast(i32, @as(u32, 0x80000001)), 0); + try test__powidf2(inf_f64, @bitCast(i32, @as(u32, 0x80000000)), 0); + + try test__powidf2(-inf_f64, -1, -0.0); + try test__powidf2(-inf_f64, -2, 0); + try test__powidf2(-inf_f64, -3, -0.0); + try test__powidf2(-inf_f64, -4, 0); + try test__powidf2(-inf_f64, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powidf2(-inf_f64, @bitCast(i32, @as(u32, 0x80000001)), -0.0); + try test__powidf2(-inf_f64, @bitCast(i32, @as(u32, 0x80000000)), 0); try test__powidf2(2, 10, 1024.0); try test__powidf2(-2, 10, 1024.0); @@ -348,17 +351,18 @@ test "powidf2" { } test "powitf2" { + const inf_f128 = math.inf(f128); try test__powitf2(0, 0, 1); try test__powitf2(1, 0, 1); try test__powitf2(1.5, 0, 1); try test__powitf2(2, 0, 1); - try test__powitf2(math.inf_f128, 0, 1); + try test__powitf2(inf_f128, 0, 1); try test__powitf2(-0.0, 0, 1); try test__powitf2(-1, 0, 1); try test__powitf2(-1.5, 0, 1); try test__powitf2(-2, 0, 1); - try test__powitf2(-math.inf_f128, 0, 1); + try test__powitf2(-inf_f128, 0, 1); try test__powitf2(0, 1, 0); try test__powitf2(0, 2, 0); @@ -381,35 +385,35 @@ test "powitf2" { try test__powitf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFE)), 1); try test__powitf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFF)), 1); - try test__powitf2(math.inf_f128, 1, math.inf_f128); - try test__powitf2(math.inf_f128, 2, math.inf_f128); - try test__powitf2(math.inf_f128, 3, math.inf_f128); - try test__powitf2(math.inf_f128, 4, math.inf_f128); - try test__powitf2(math.inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f128); - try test__powitf2(math.inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFF)), math.inf_f128); - - try test__powitf2(-math.inf_f128, 1, -math.inf_f128); - try test__powitf2(-math.inf_f128, 2, math.inf_f128); - try test__powitf2(-math.inf_f128, 3, -math.inf_f128); - try test__powitf2(-math.inf_f128, 4, math.inf_f128); - try test__powitf2(-math.inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f128); - try test__powitf2(-math.inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -math.inf_f128); - - try test__powitf2(0, -1, math.inf_f128); - try test__powitf2(0, -2, math.inf_f128); - try test__powitf2(0, -3, math.inf_f128); - try test__powitf2(0, -4, math.inf_f128); - try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f128); - try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000001)), math.inf_f128); - try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f128); - - try test__powitf2(-0.0, -1, -math.inf_f128); - try test__powitf2(-0.0, -2, math.inf_f128); - try test__powitf2(-0.0, -3, -math.inf_f128); - try test__powitf2(-0.0, -4, math.inf_f128); - try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f128); - try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -math.inf_f128); - try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f128); + try test__powitf2(inf_f128, 1, inf_f128); + try test__powitf2(inf_f128, 2, inf_f128); + try test__powitf2(inf_f128, 3, inf_f128); + try test__powitf2(inf_f128, 4, inf_f128); + try test__powitf2(inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f128); + try test__powitf2(inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFF)), inf_f128); + + try test__powitf2(-inf_f128, 1, -inf_f128); + try test__powitf2(-inf_f128, 2, inf_f128); + try test__powitf2(-inf_f128, 3, -inf_f128); + try test__powitf2(-inf_f128, 4, inf_f128); + try test__powitf2(-inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f128); + try test__powitf2(-inf_f128, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -inf_f128); + + try test__powitf2(0, -1, inf_f128); + try test__powitf2(0, -2, inf_f128); + try test__powitf2(0, -3, inf_f128); + try test__powitf2(0, -4, inf_f128); + try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000002)), inf_f128); + try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000001)), inf_f128); + try test__powitf2(0, @bitCast(i32, @as(u32, 0x80000000)), inf_f128); + + try test__powitf2(-0.0, -1, -inf_f128); + try test__powitf2(-0.0, -2, inf_f128); + try test__powitf2(-0.0, -3, -inf_f128); + try test__powitf2(-0.0, -4, inf_f128); + try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), inf_f128); + try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -inf_f128); + try test__powitf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), inf_f128); try test__powitf2(1, -1, 1); try test__powitf2(1, -2, 1); @@ -419,21 +423,21 @@ test "powitf2" { try test__powitf2(1, @bitCast(i32, @as(u32, 0x80000001)), 1); try test__powitf2(1, @bitCast(i32, @as(u32, 0x80000000)), 1); - try test__powitf2(math.inf_f128, -1, 0); - try test__powitf2(math.inf_f128, -2, 0); - try test__powitf2(math.inf_f128, -3, 0); - try test__powitf2(math.inf_f128, -4, 0); - try test__powitf2(math.inf_f128, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powitf2(math.inf_f128, @bitCast(i32, @as(u32, 0x80000001)), 0); - try test__powitf2(math.inf_f128, @bitCast(i32, @as(u32, 0x80000000)), 0); - - try test__powitf2(-math.inf_f128, -1, -0.0); - try test__powitf2(-math.inf_f128, -2, 0); - try test__powitf2(-math.inf_f128, -3, -0.0); - try test__powitf2(-math.inf_f128, -4, 0); - try test__powitf2(-math.inf_f128, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powitf2(-math.inf_f128, @bitCast(i32, @as(u32, 0x80000001)), -0.0); - try test__powitf2(-math.inf_f128, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powitf2(inf_f128, -1, 0); + try test__powitf2(inf_f128, -2, 0); + try test__powitf2(inf_f128, -3, 0); + try test__powitf2(inf_f128, -4, 0); + try test__powitf2(inf_f128, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powitf2(inf_f128, @bitCast(i32, @as(u32, 0x80000001)), 0); + try test__powitf2(inf_f128, @bitCast(i32, @as(u32, 0x80000000)), 0); + + try test__powitf2(-inf_f128, -1, -0.0); + try test__powitf2(-inf_f128, -2, 0); + try test__powitf2(-inf_f128, -3, -0.0); + try test__powitf2(-inf_f128, -4, 0); + try test__powitf2(-inf_f128, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powitf2(-inf_f128, @bitCast(i32, @as(u32, 0x80000001)), -0.0); + try test__powitf2(-inf_f128, @bitCast(i32, @as(u32, 0x80000000)), 0); try test__powitf2(2, 10, 1024.0); try test__powitf2(-2, 10, 1024.0); @@ -452,17 +456,18 @@ test "powitf2" { } test "powixf2" { + const inf_f80 = math.inf(f80); try test__powixf2(0, 0, 1); try test__powixf2(1, 0, 1); try test__powixf2(1.5, 0, 1); try test__powixf2(2, 0, 1); - try test__powixf2(math.inf_f80, 0, 1); + try test__powixf2(inf_f80, 0, 1); try test__powixf2(-0.0, 0, 1); try test__powixf2(-1, 0, 1); try test__powixf2(-1.5, 0, 1); try test__powixf2(-2, 0, 1); - try test__powixf2(-math.inf_f80, 0, 1); + try test__powixf2(-inf_f80, 0, 1); try test__powixf2(0, 1, 0); try test__powixf2(0, 2, 0); @@ -485,35 +490,35 @@ test "powixf2" { try test__powixf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFE)), 1); try test__powixf2(1, @bitCast(i32, @as(u32, 0x7FFFFFFF)), 1); - try test__powixf2(math.inf_f80, 1, math.inf_f80); - try test__powixf2(math.inf_f80, 2, math.inf_f80); - try test__powixf2(math.inf_f80, 3, math.inf_f80); - try test__powixf2(math.inf_f80, 4, math.inf_f80); - try test__powixf2(math.inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f80); - try test__powixf2(math.inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFF)), math.inf_f80); - - try test__powixf2(-math.inf_f80, 1, -math.inf_f80); - try test__powixf2(-math.inf_f80, 2, math.inf_f80); - try test__powixf2(-math.inf_f80, 3, -math.inf_f80); - try test__powixf2(-math.inf_f80, 4, math.inf_f80); - try test__powixf2(-math.inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFE)), math.inf_f80); - try test__powixf2(-math.inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -math.inf_f80); - - try test__powixf2(0, -1, math.inf_f80); - try test__powixf2(0, -2, math.inf_f80); - try test__powixf2(0, -3, math.inf_f80); - try test__powixf2(0, -4, math.inf_f80); - try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f80); - try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000001)), math.inf_f80); - try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f80); - - try test__powixf2(-0.0, -1, -math.inf_f80); - try test__powixf2(-0.0, -2, math.inf_f80); - try test__powixf2(-0.0, -3, -math.inf_f80); - try test__powixf2(-0.0, -4, math.inf_f80); - try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), math.inf_f80); - try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -math.inf_f80); - try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), math.inf_f80); + try test__powixf2(inf_f80, 1, inf_f80); + try test__powixf2(inf_f80, 2, inf_f80); + try test__powixf2(inf_f80, 3, inf_f80); + try test__powixf2(inf_f80, 4, inf_f80); + try test__powixf2(inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f80); + try test__powixf2(inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFF)), inf_f80); + + try test__powixf2(-inf_f80, 1, -inf_f80); + try test__powixf2(-inf_f80, 2, inf_f80); + try test__powixf2(-inf_f80, 3, -inf_f80); + try test__powixf2(-inf_f80, 4, inf_f80); + try test__powixf2(-inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFE)), inf_f80); + try test__powixf2(-inf_f80, @bitCast(i32, @as(u32, 0x7FFFFFFF)), -inf_f80); + + try test__powixf2(0, -1, inf_f80); + try test__powixf2(0, -2, inf_f80); + try test__powixf2(0, -3, inf_f80); + try test__powixf2(0, -4, inf_f80); + try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000002)), inf_f80); + try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000001)), inf_f80); + try test__powixf2(0, @bitCast(i32, @as(u32, 0x80000000)), inf_f80); + + try test__powixf2(-0.0, -1, -inf_f80); + try test__powixf2(-0.0, -2, inf_f80); + try test__powixf2(-0.0, -3, -inf_f80); + try test__powixf2(-0.0, -4, inf_f80); + try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000002)), inf_f80); + try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000001)), -inf_f80); + try test__powixf2(-0.0, @bitCast(i32, @as(u32, 0x80000000)), inf_f80); try test__powixf2(1, -1, 1); try test__powixf2(1, -2, 1); @@ -523,21 +528,21 @@ test "powixf2" { try test__powixf2(1, @bitCast(i32, @as(u32, 0x80000001)), 1); try test__powixf2(1, @bitCast(i32, @as(u32, 0x80000000)), 1); - try test__powixf2(math.inf_f80, -1, 0); - try test__powixf2(math.inf_f80, -2, 0); - try test__powixf2(math.inf_f80, -3, 0); - try test__powixf2(math.inf_f80, -4, 0); - try test__powixf2(math.inf_f80, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powixf2(math.inf_f80, @bitCast(i32, @as(u32, 0x80000001)), 0); - try test__powixf2(math.inf_f80, @bitCast(i32, @as(u32, 0x80000000)), 0); - - try test__powixf2(-math.inf_f80, -1, -0.0); - try test__powixf2(-math.inf_f80, -2, 0); - try test__powixf2(-math.inf_f80, -3, -0.0); - try test__powixf2(-math.inf_f80, -4, 0); - try test__powixf2(-math.inf_f80, @bitCast(i32, @as(u32, 0x80000002)), 0); - try test__powixf2(-math.inf_f80, @bitCast(i32, @as(u32, 0x80000001)), -0.0); - try test__powixf2(-math.inf_f80, @bitCast(i32, @as(u32, 0x80000000)), 0); + try test__powixf2(inf_f80, -1, 0); + try test__powixf2(inf_f80, -2, 0); + try test__powixf2(inf_f80, -3, 0); + try test__powixf2(inf_f80, -4, 0); + try test__powixf2(inf_f80, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powixf2(inf_f80, @bitCast(i32, @as(u32, 0x80000001)), 0); + try test__powixf2(inf_f80, @bitCast(i32, @as(u32, 0x80000000)), 0); + + try test__powixf2(-inf_f80, -1, -0.0); + try test__powixf2(-inf_f80, -2, 0); + try test__powixf2(-inf_f80, -3, -0.0); + try test__powixf2(-inf_f80, -4, 0); + try test__powixf2(-inf_f80, @bitCast(i32, @as(u32, 0x80000002)), 0); + try test__powixf2(-inf_f80, @bitCast(i32, @as(u32, 0x80000001)), -0.0); + try test__powixf2(-inf_f80, @bitCast(i32, @as(u32, 0x80000000)), 0); try test__powixf2(2, 10, 1024.0); try test__powixf2(-2, 10, 1024.0); diff --git a/lib/compiler_rt/udivmod.zig b/lib/compiler_rt/udivmod.zig index bae4365295..a83ece8ada 100644 --- a/lib/compiler_rt/udivmod.zig +++ b/lib/compiler_rt/udivmod.zig @@ -1,201 +1,149 @@ +const std = @import("std"); const builtin = @import("builtin"); const is_test = builtin.is_test; -const native_endian = builtin.cpu.arch.endian(); -const std = @import("std"); +const Log2Int = std.math.Log2Int; +const HalveInt = @import("common.zig").HalveInt; -const low = switch (native_endian) { +const lo = switch (builtin.cpu.arch.endian()) { .Big => 1, .Little => 0, }; -const high = 1 - low; +const hi = 1 - lo; -pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt { +// Let _u1 and _u0 be the high and low limbs of U respectively. +// Returns U / v_ and sets r = U % v_. +fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T { + const HalfT = HalveInt(T, false).HalfT; @setRuntimeSafety(is_test); + var v = v_; - const double_int_bits = @typeInfo(DoubleInt).Int.bits; - const single_int_bits = @divExact(double_int_bits, 2); - const SingleInt = std.meta.Int(.unsigned, single_int_bits); - const SignedDoubleInt = std.meta.Int(.signed, double_int_bits); - const Log2SingleInt = std.math.Log2Int(SingleInt); - - const n = @bitCast([2]SingleInt, a); - const d = @bitCast([2]SingleInt, b); - var q: [2]SingleInt = undefined; - var r: [2]SingleInt = undefined; - var sr: c_uint = undefined; - // special cases, X is unknown, K != 0 - if (n[high] == 0) { - if (d[high] == 0) { - // 0 X - // --- - // 0 X - if (maybe_rem) |rem| { - rem.* = n[low] % d[low]; - } - return n[low] / d[low]; - } - // 0 X - // --- - // K X + const b = @as(T, 1) << (@bitSizeOf(T) / 2); + var un64: T = undefined; + var un10: T = undefined; + + const s = @intCast(Log2Int(T), @clz(v)); + if (s > 0) { + // Normalize divisor + v <<= s; + un64 = (_u1 << s) | (_u0 >> @intCast(Log2Int(T), (@bitSizeOf(T) - @intCast(T, s)))); + un10 = _u0 << s; + } else { + // Avoid undefined behavior of (u0 >> @bitSizeOf(T)) + un64 = _u1; + un10 = _u0; + } + + // Break divisor up into two 32-bit digits + const vn1 = v >> (@bitSizeOf(T) / 2); + const vn0 = v & std.math.maxInt(HalfT); + + // Break right half of dividend into two digits + const un1 = un10 >> (@bitSizeOf(T) / 2); + const un0 = un10 & std.math.maxInt(HalfT); + + // Compute the first quotient digit, q1 + var q1 = un64 / vn1; + var rhat = un64 -% q1 *% vn1; + + // q1 has at most error 2. No more than 2 iterations + while (q1 >= b or q1 * vn0 > b * rhat + un1) { + q1 -= 1; + rhat += vn1; + if (rhat >= b) break; + } + + var un21 = un64 *% b +% un1 -% q1 *% v; + + // Compute the second quotient digit + var q0 = un21 / vn1; + rhat = un21 -% q0 *% vn1; + + // q0 has at most error 2. No more than 2 iterations. + while (q0 >= b or q0 * vn0 > b * rhat + un0) { + q0 -= 1; + rhat += vn1; + if (rhat >= b) break; + } + + r.* = (un21 *% b +% un0 -% q0 *% v) >> s; + return q1 *% b +% q0; +} + +fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T { + @setRuntimeSafety(is_test); + if (T == u64 and builtin.target.cpu.arch == .x86_64 and builtin.target.os.tag != .windows) { + var rem: T = undefined; + const quo = asm ( + \\divq %[v] + : [_] "={rax}" (-> T), + [_] "={rdx}" (rem), + : [v] "r" (v), + [_] "{rax}" (_u0), + [_] "{rdx}" (_u1), + ); + r.* = rem; + return quo; + } else { + return divwide_generic(T, _u1, _u0, v, r); + } +} + +// Returns a_ / b_ and sets maybe_rem = a_ % b. +pub fn udivmod(comptime T: type, a_: T, b_: T, maybe_rem: ?*T) T { + @setRuntimeSafety(is_test); + const HalfT = HalveInt(T, false).HalfT; + const SignedT = std.meta.Int(.signed, @bitSizeOf(T)); + + if (b_ > a_) { if (maybe_rem) |rem| { - rem.* = n[low]; + rem.* = a_; } return 0; } - // n[high] != 0 - if (d[low] == 0) { - if (d[high] == 0) { - // K X - // --- - // 0 0 - if (maybe_rem) |rem| { - rem.* = n[high] % d[low]; - } - return n[high] / d[low]; - } - // d[high] != 0 - if (n[low] == 0) { - // K 0 - // --- - // K 0 - if (maybe_rem) |rem| { - r[high] = n[high] % d[high]; - r[low] = 0; - rem.* = @bitCast(DoubleInt, r); - } - return n[high] / d[high]; - } - // K K - // --- - // K 0 - if ((d[high] & (d[high] - 1)) == 0) { - // d is a power of 2 - if (maybe_rem) |rem| { - r[low] = n[low]; - r[high] = n[high] & (d[high] - 1); - rem.* = @bitCast(DoubleInt, r); - } - return n[high] >> @intCast(Log2SingleInt, @ctz(d[high])); - } - // K K - // --- - // K 0 - sr = @bitCast(c_uint, @as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))); - // 0 <= sr <= single_int_bits - 2 or sr large - if (sr > single_int_bits - 2) { - if (maybe_rem) |rem| { - rem.* = a; - } - return 0; - } - sr += 1; - // 1 <= sr <= single_int_bits - 1 - // q.all = a << (double_int_bits - sr); - q[low] = 0; - q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); - // r.all = a >> sr; - r[high] = n[high] >> @intCast(Log2SingleInt, sr); - r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); - } else { - // d[low] != 0 - if (d[high] == 0) { - // K X - // --- - // 0 K - if ((d[low] & (d[low] - 1)) == 0) { - // d is a power of 2 - if (maybe_rem) |rem| { - rem.* = n[low] & (d[low] - 1); - } - if (d[low] == 1) { - return a; - } - sr = @ctz(d[low]); - q[high] = n[high] >> @intCast(Log2SingleInt, sr); - q[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); - return @bitCast(DoubleInt, q); - } - // K X - // --- - // 0 K - sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high])); - // 2 <= sr <= double_int_bits - 1 - // q.all = a << (double_int_bits - sr); - // r.all = a >> sr; - if (sr == single_int_bits) { - q[low] = 0; - q[high] = n[low]; - r[high] = 0; - r[low] = n[high]; - } else if (sr < single_int_bits) { - // 2 <= sr <= single_int_bits - 1 - q[low] = 0; - q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); - r[high] = n[high] >> @intCast(Log2SingleInt, sr); - r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); - } else { - // single_int_bits + 1 <= sr <= double_int_bits - 1 - q[low] = n[low] << @intCast(Log2SingleInt, double_int_bits - sr); - q[high] = (n[high] << @intCast(Log2SingleInt, double_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr - single_int_bits)); - r[high] = 0; - r[low] = n[high] >> @intCast(Log2SingleInt, sr - single_int_bits); - } + + var a = @bitCast([2]HalfT, a_); + var b = @bitCast([2]HalfT, b_); + var q: [2]HalfT = undefined; + var r: [2]HalfT = undefined; + + // When the divisor fits in 64 bits, we can use an optimized path + if (b[hi] == 0) { + r[hi] = 0; + if (a[hi] < b[lo]) { + // The result fits in 64 bits + q[hi] = 0; + q[lo] = divwide(HalfT, a[hi], a[lo], b[lo], &r[lo]); } else { - // K X - // --- - // K K - sr = @bitCast(c_uint, @as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))); - // 0 <= sr <= single_int_bits - 1 or sr large - if (sr > single_int_bits - 1) { - if (maybe_rem) |rem| { - rem.* = a; - } - return 0; - } - sr += 1; - // 1 <= sr <= single_int_bits - // q.all = a << (double_int_bits - sr); - // r.all = a >> sr; - q[low] = 0; - if (sr == single_int_bits) { - q[high] = n[low]; - r[high] = 0; - r[low] = n[high]; - } else { - r[high] = n[high] >> @intCast(Log2SingleInt, sr); - r[low] = (n[high] << @intCast(Log2SingleInt, single_int_bits - sr)) | (n[low] >> @intCast(Log2SingleInt, sr)); - q[high] = n[low] << @intCast(Log2SingleInt, single_int_bits - sr); - } + // First, divide with the high part to get the remainder. After that a_hi < b_lo. + q[hi] = a[hi] / b[lo]; + q[lo] = divwide(HalfT, a[hi] % b[lo], a[lo], b[lo], &r[lo]); } + if (maybe_rem) |rem| { + rem.* = @bitCast(T, r); + } + return @bitCast(T, q); } - // Not a special case - // q and r are initialized with: - // q.all = a << (double_int_bits - sr); - // r.all = a >> sr; - // 1 <= sr <= double_int_bits - 1 - var carry: u32 = 0; - var r_all: DoubleInt = undefined; - while (sr > 0) : (sr -= 1) { - // r:q = ((r:q) << 1) | carry - r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1)); - r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1)); - q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1)); - q[low] = (q[low] << 1) | carry; - // carry = 0; - // if (r.all >= b) - // { - // r.all -= b; - // carry = 1; + + // 0 <= shift <= 63 + var shift: Log2Int(T) = @clz(b[hi]) - @clz(a[hi]); + var af = @bitCast(T, a); + var bf = @bitCast(T, b) << shift; + q = @bitCast([2]HalfT, @as(T, 0)); + + for (0..shift + 1) |_| { + q[lo] <<= 1; + // Branchless version of: + // if (af >= bf) { + // af -= bf; + // q[lo] |= 1; // } - r_all = @bitCast(DoubleInt, r); - const s: SignedDoubleInt = @bitCast(SignedDoubleInt, b -% r_all -% 1) >> (double_int_bits - 1); - carry = @intCast(u32, s & 1); - r_all -= b & @bitCast(DoubleInt, s); - r = @bitCast([2]SingleInt, r_all); + const s = @bitCast(SignedT, bf -% af -% 1) >> (@bitSizeOf(T) - 1); + q[lo] |= @intCast(HalfT, s & 1); + af -= bf & @bitCast(T, s); + bf >>= 1; } - const q_all = (@bitCast(DoubleInt, q) << 1) | carry; if (maybe_rem) |rem| { - rem.* = r_all; + rem.* = @bitCast(T, af); } - return q_all; + return @bitCast(T, q); } |
