lib/std/special/compiler_rt/trunc_f80.zig


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

const std = @import("std");
const builtin = @import("builtin");
const native_arch = builtin.cpu.arch;

// AArch64 is the only ABI (at the moment) to support f16 arguments without the
// need for extending them to wider fp types.
pub const F16T = if (native_arch.isAARCH64()) f16 else u16;

pub fn __truncxfhf2(a: f80) callconv(.C) F16T {
    return @bitCast(F16T, trunc(f16, a));
}

pub fn __truncxfsf2(a: f80) callconv(.C) f32 {
    return trunc(f32, a);
}

pub fn __truncxfdf2(a: f80) callconv(.C) f64 {
    return trunc(f64, a);
}

inline fn trunc(comptime dst_t: type, a: f80) dst_t {
    @setRuntimeSafety(builtin.is_test);

    const dst_rep_t = std.meta.Int(.unsigned, @typeInfo(dst_t).Float.bits);
    const src_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit
    const dst_sig_bits = std.math.floatMantissaBits(dst_t);

    const src_exp_bias = 16383;

    const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
    const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);

    const dst_bits = @typeInfo(dst_t).Float.bits;
    const dst_exp_bits = dst_bits - dst_sig_bits - 1;
    const dst_inf_exp = (1 << dst_exp_bits) - 1;
    const dst_exp_bias = dst_inf_exp >> 1;

    const underflow = src_exp_bias + 1 - dst_exp_bias;
    const overflow = src_exp_bias + dst_inf_exp - dst_exp_bias;

    const dst_qnan = 1 << (dst_sig_bits - 1);
    const dst_nan_mask = dst_qnan - 1;

    // Break a into a sign and representation of the absolute value
    var a_rep = std.math.break_f80(a);
    const sign = a_rep.exp & 0x8000;
    a_rep.exp &= 0x7FFF;
    a_rep.fraction &= 0x7FFFFFFFFFFFFFFF;
    var abs_result: dst_rep_t = undefined;

    if (a_rep.exp -% underflow < a_rep.exp -% overflow) {
        // The exponent of a is within the range of normal numbers in the
        // destination format.  We can convert by simply right-shifting with
        // rounding and adjusting the exponent.
        abs_result = @as(dst_rep_t, a_rep.exp) << dst_sig_bits;
        abs_result |= @truncate(dst_rep_t, a_rep.fraction >> (src_sig_bits - dst_sig_bits));
        abs_result -%= @as(dst_rep_t, src_exp_bias - dst_exp_bias) << dst_sig_bits;

        const round_bits = a_rep.fraction & round_mask;
        if (round_bits > halfway) {
            // Round to nearest
            abs_result += 1;
        } else if (round_bits == halfway) {
            // Ties to even
            abs_result += abs_result & 1;
        }
    } else if (a_rep.exp == 0x7FFF and a_rep.fraction != 0) {
        // a is NaN.
        // Conjure the result by beginning with infinity, setting the qNaN
        // bit and inserting the (truncated) trailing NaN field.
        abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits;
        abs_result |= dst_qnan;
        abs_result |= @intCast(dst_rep_t, (a_rep.fraction >> (src_sig_bits - dst_sig_bits)) & dst_nan_mask);
    } else if (a_rep.exp >= overflow) {
        // a overflows to infinity.
        abs_result = @intCast(dst_rep_t, dst_inf_exp) << dst_sig_bits;
    } else {
        // a underflows on conversion to the destination type or is an exact
        // zero.  The result may be a denormal or zero.  Extract the exponent
        // to get the shift amount for the denormalization.
        const shift = src_exp_bias - dst_exp_bias - a_rep.exp;

        // Right shift by the denormalization amount with sticky.
        if (shift > src_sig_bits) {
            abs_result = 0;
        } else {
            const sticky = @boolToInt(a_rep.fraction << @intCast(u6, shift) != 0);
            const denormalized_significand = a_rep.fraction >> @intCast(u6, shift) | sticky;
            abs_result = @intCast(dst_rep_t, denormalized_significand >> (src_sig_bits - dst_sig_bits));
            const round_bits = denormalized_significand & round_mask;
            if (round_bits > halfway) {
                // Round to nearest
                abs_result += 1;
            } else if (round_bits == halfway) {
                // Ties to even
                abs_result += abs_result & 1;
            }
        }
    }

    const result align(@alignOf(dst_t)) = abs_result | @as(dst_rep_t, sign) << dst_bits - 16;
    return @bitCast(dst_t, result);
}

pub fn __trunctfxf2(a: f128) callconv(.C) f80 {
    const src_sig_bits = std.math.floatMantissaBits(f128);
    const dst_sig_bits = std.math.floatMantissaBits(f80) - 1; // -1 for the integer bit

    // Various constants whose values follow from the type parameters.
    // Any reasonable optimizer will fold and propagate all of these.
    const src_bits = @typeInfo(f128).Float.bits;
    const src_exp_bits = src_bits - src_sig_bits - 1;
    const src_inf_exp = 0x7FFF;

    const src_inf = src_inf_exp << src_sig_bits;
    const src_sign_mask = 1 << (src_sig_bits + src_exp_bits);
    const src_abs_mask = src_sign_mask - 1;
    const round_mask = (1 << (src_sig_bits - dst_sig_bits)) - 1;
    const halfway = 1 << (src_sig_bits - dst_sig_bits - 1);
    const src_qnan = 1 << (src_sig_bits - 1);
    const src_nan_mask = src_qnan - 1;

    // Break a into a sign and representation of the absolute value
    const a_rep = @bitCast(u128, a);
    const a_abs = a_rep & src_abs_mask;
    const sign: u16 = if (a_rep & src_sign_mask != 0) 0x8000 else 0;

    var res: std.math.F80 = undefined;

    if (a_abs > src_inf) {
        // a is NaN.
        // Conjure the result by beginning with infinity, setting the qNaN
        // bit and inserting the (truncated) trailing NaN field.
        res.exp = 0x7fff;
        res.fraction = 0x8000000000000000;
        res.fraction |= @truncate(u64, (a_abs & src_qnan) << (src_sig_bits - dst_sig_bits));
        res.fraction |= @truncate(u64, (a_abs & src_nan_mask) << (src_sig_bits - dst_sig_bits));
    } else {
        // The exponent of a is within the range of normal numbers in the
        // destination format.  We can convert by simply right-shifting with
        // rounding and adjusting the exponent.
        res.fraction = @truncate(u64, a_abs >> (src_sig_bits - dst_sig_bits));
        res.exp = @truncate(u16, a_abs >> src_sig_bits);

        const round_bits = a_abs & round_mask;
        if (round_bits > halfway) {
            // Round to nearest
            const exp = @addWithOverflow(u64, res.fraction, 1, &res.fraction);
            res.exp += @boolToInt(exp);
        } else if (round_bits == halfway) {
            // Ties to even
            const exp = @addWithOverflow(u64, res.fraction, res.fraction & 1, &res.fraction);
            res.exp += @boolToInt(exp);
        }
    }

    res.exp |= sign;
    return std.math.make_f80(res);
}