aboutsummaryrefslogtreecommitdiff
path: root/lib/compiler_rt/udivmodei4.zig
blob: 0923f3f222fcdc070334afc3fdad4a610ecb4f6f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
const std = @import("std");
const builtin = @import("builtin");
const common = @import("common.zig");
const shr = std.math.shr;
const shl = std.math.shl;

const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535

comptime {
    @export(&__udivei4, .{ .name = "__udivei4", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__umodei4, .{ .name = "__umodei4", .linkage = common.linkage, .visibility = common.visibility });
}

const endian = builtin.cpu.arch.endian();

/// Get the value of a limb.
inline fn limb(x: []const u32, i: usize) u32 {
    return if (endian == .little) x[i] else x[x.len - 1 - i];
}

/// Change the value of a limb.
inline fn limb_set(x: []u32, i: usize, v: u32) void {
    if (endian == .little) {
        x[i] = v;
    } else {
        x[x.len - 1 - i] = v;
    }
}

/// Uses Knuth's Algorithm D, 4.3.1, p. 272.
pub fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void {
    if (q) |q_| @memset(q_[0..], 0);
    if (r) |r_| @memset(r_[0..], 0);

    if (u.len == 0 or v.len == 0) return error.DivisionByZero;

    var m = u.len - 1;
    var n = v.len - 1;
    while (limb(u, m) == 0) : (m -= 1) {
        if (m == 0) return;
    }
    while (limb(v, n) == 0) : (n -= 1) {
        if (n == 0) return error.DivisionByZero;
    }

    if (n > m) {
        if (r) |r_| @memcpy(r_[0..u.len], u);
        return;
    }

    const s = @clz(limb(v, n));

    var vn: [max_limbs]u32 = undefined;
    var i = n;
    while (i > 0) : (i -= 1) {
        limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s));
    }
    limb_set(&vn, 0, shl(u32, limb(v, 0), s));

    var un: [max_limbs + 1]u32 = undefined;
    limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s));
    i = m;
    while (i > 0) : (i -= 1) {
        limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s));
    }
    limb_set(&un, 0, shl(u32, limb(u, 0), s));

    var j = m - n;
    while (true) : (j -= 1) {
        const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n);
        var qhat = uu / limb(&vn, n);
        var rhat = uu % limb(&vn, n);

        while (true) {
            if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) {
                qhat -= 1;
                rhat += limb(&vn, n);
                if (rhat < (1 << 32)) continue;
            }
            break;
        }
        var carry: i64 = 0;
        i = 0;
        while (i <= n) : (i += 1) {
            const p = qhat * limb(&vn, i);
            const t = limb(&un, i + j) - carry - @as(u32, @truncate(p));
            limb_set(&un, i + j, @as(u32, @truncate(@as(u64, @bitCast(t)))));
            carry = @as(i64, @intCast(p >> 32)) - @as(i64, @intCast(t >> 32));
        }
        const t = limb(&un, j + n + 1) -% carry;
        limb_set(&un, j + n + 1, @as(u32, @truncate(@as(u64, @bitCast(t)))));
        if (q) |q_| limb_set(q_, j, @as(u32, @truncate(qhat)));
        if (t < 0) {
            if (q) |q_| limb_set(q_, j, limb(q_, j) - 1);
            var carry2: u64 = 0;
            i = 0;
            while (i <= n) : (i += 1) {
                const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2;
                limb_set(&un, i + j, @as(u32, @truncate(t2)));
                carry2 = t2 >> 32;
            }
            limb_set(&un, j + n + 1, @as(u32, @truncate(limb(&un, j + n + 1) + carry2)));
        }
        if (j == 0) break;
    }
    if (r) |r_| {
        i = 0;
        while (i <= n) : (i += 1) {
            limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s));
        }
        limb_set(r_, n, shr(u32, limb(&un, n), s));
    }
}

pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
    @setRuntimeSafety(common.test_safety);
    const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
    const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size]));
    const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
    const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size]));
    @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable;
}

pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
    @setRuntimeSafety(common.test_safety);
    const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
    const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size]));
    const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
    const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size]));
    @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable;
}

test "__udivei4/__umodei4" {
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;

    const RndGen = std.Random.DefaultPrng;
    var rnd = RndGen.init(42);
    var i: usize = 10000;
    while (i > 0) : (i -= 1) {
        const u = rnd.random().int(u1000);
        const v = 1 + rnd.random().int(u1200);
        const q = u / v;
        const r = u % v;
        const z = q * v + r;
        try std.testing.expect(z == u);
    }
}