aboutsummaryrefslogtreecommitdiff
path: root/lib/compiler_rt/count0bits.zig
blob: 874604eb2c60c240e1d4aee1721c7ecbdb62bf0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
const std = @import("std");
const builtin = @import("builtin");
const common = @import("common.zig");

pub const panic = common.panic;

comptime {
    @export(&__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__clzti2, .{ .name = "__clzti2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage, .visibility = common.visibility });
    @export(&__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage, .visibility = common.visibility });
}

// clz - count leading zeroes
// - clzXi2 for unoptimized little and big endian
// - __clzsi2_thumb1: assume a != 0
// - __clzsi2_arm32: assume a != 0

// ctz - count trailing zeroes
// - ctzXi2 for unoptimized little and big endian

// ffs - find first set
// * ffs = (a == 0) => 0, (a != 0) => ctz + 1
// * dont pay for `if (x == 0) return shift;` inside ctz
// - ffsXi2 for unoptimized little and big endian

inline fn clzXi2(comptime T: type, a: T) i32 {
    var x = switch (@bitSizeOf(T)) {
        32 => @as(u32, @bitCast(a)),
        64 => @as(u64, @bitCast(a)),
        128 => @as(u128, @bitCast(a)),
        else => unreachable,
    };
    var n: T = @bitSizeOf(T);
    // Count first bit set using binary search, from Hacker's Delight
    var y: @TypeOf(x) = 0;
    comptime var shift: u8 = @bitSizeOf(T);
    inline while (shift > 0) {
        shift = shift >> 1;
        y = x >> shift;
        if (y != 0) {
            n = n - shift;
            x = y;
        }
    }
    return @intCast(n - @as(T, @bitCast(x)));
}

fn __clzsi2_thumb1() callconv(.naked) void {
    @setRuntimeSafety(false);

    // Similar to the generic version with the last two rounds replaced by a LUT
    asm volatile (
        \\ movs r1, #32
        \\ lsrs r2, r0, #16
        \\ beq 1f
        \\ subs r1, #16
        \\ movs r0, r2
        \\ 1:
        \\ lsrs r2, r0, #8
        \\ beq 1f
        \\ subs r1, #8
        \\ movs r0, r2
        \\ 1:
        \\ lsrs r2, r0, #4
        \\ beq 1f
        \\ subs r1, #4
        \\ movs r0, r2
        \\ 1:
        \\ adr r3, .lut
        \\ ldrb r0, [r3, r0]
        \\ subs r0, r1, r0
        \\ bx lr
        \\ .p2align 2
        \\ // Number of bits set in the 0-15 range
        \\ .lut:
        \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
    );

    unreachable;
}

fn __clzsi2_arm32() callconv(.naked) void {
    @setRuntimeSafety(false);

    asm volatile (
        \\ // Assumption: n != 0
        \\ // r0: n
        \\ // r1: count of leading zeros in n + 1
        \\ // r2: scratch register for shifted r0
        \\ mov r1, #1
        \\
        \\ // Basic block:
        \\ // if ((r0 >> SHIFT) == 0)
        \\ //   r1 += SHIFT;
        \\ // else
        \\ //   r0 >>= SHIFT;
        \\ // for descending powers of two as SHIFT.
        \\ lsrs r2, r0, #16
        \\ movne r0, r2
        \\ addeq r1, #16
        \\
        \\ lsrs r2, r0, #8
        \\ movne r0, r2
        \\ addeq r1, #8
        \\
        \\ lsrs r2, r0, #4
        \\ movne r0, r2
        \\ addeq r1, #4
        \\
        \\ lsrs r2, r0, #2
        \\ movne r0, r2
        \\ addeq r1, #2
        \\
        \\ // The basic block invariants at this point are (r0 >> 2) == 0 and
        \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
        \\ //
        \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
        \\ // ---+----------------+----------------+------------+--------------
        \\ // 1  | 1              | 0              | 0          | 1
        \\ // 2  | 0              | 1              | -1         | 0
        \\ // 3  | 0              | 1              | -1         | 0
        \\ //
        \\ // The r1's initial value of 1 compensates for the 1 here.
        \\ sub r0, r1, r0, lsr #1
        \\ bx lr
    );

    unreachable;
}

fn clzsi2_generic(a: i32) callconv(.c) i32 {
    return clzXi2(i32, a);
}

pub const __clzsi2 = switch (builtin.cpu.arch) {
    .arm, .armeb, .thumb, .thumbeb => impl: {
        const use_thumb1 =
            (builtin.cpu.arch.isThumb() or builtin.cpu.has(.arm, .noarm)) and !builtin.cpu.has(.arm, .thumb2);

        if (use_thumb1) {
            break :impl __clzsi2_thumb1;
        }
        // From here on we're either targeting Thumb2 or ARM.
        else if (!builtin.cpu.arch.isThumb()) {
            break :impl __clzsi2_arm32;
        }
        // Use the generic implementation otherwise.
        else break :impl clzsi2_generic;
    },
    else => clzsi2_generic,
};

pub fn __clzdi2(a: i64) callconv(.c) i32 {
    return clzXi2(i64, a);
}

pub fn __clzti2(a: i128) callconv(.c) i32 {
    return clzXi2(i128, a);
}

inline fn ctzXi2(comptime T: type, a: T) i32 {
    var x = switch (@bitSizeOf(T)) {
        32 => @as(u32, @bitCast(a)),
        64 => @as(u64, @bitCast(a)),
        128 => @as(u128, @bitCast(a)),
        else => unreachable,
    };
    var n: T = 1;
    // Number of trailing zeroes as binary search, from Hacker's Delight
    var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
    comptime var shift = @bitSizeOf(T);
    if (x == 0) return shift;
    inline while (shift > 1) {
        shift = shift >> 1;
        mask = mask >> shift;
        if ((x & mask) == 0) {
            n = n + shift;
            x = x >> shift;
        }
    }
    return @intCast(n - @as(T, @bitCast((x & 1))));
}

pub fn __ctzsi2(a: i32) callconv(.c) i32 {
    return ctzXi2(i32, a);
}

pub fn __ctzdi2(a: i64) callconv(.c) i32 {
    return ctzXi2(i64, a);
}

pub fn __ctzti2(a: i128) callconv(.c) i32 {
    return ctzXi2(i128, a);
}

inline fn ffsXi2(comptime T: type, a: T) i32 {
    var x: std.meta.Int(.unsigned, @typeInfo(T).int.bits) = @bitCast(a);
    var n: T = 1;
    // adapted from Number of trailing zeroes (see ctzXi2)
    var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
    comptime var shift = @bitSizeOf(T);
    // In contrast to ctz return 0
    if (x == 0) return 0;
    inline while (shift > 1) {
        shift = shift >> 1;
        mask = mask >> shift;
        if ((x & mask) == 0) {
            n = n + shift;
            x = x >> shift;
        }
    }
    // return ctz + 1
    return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1;
}

pub fn __ffssi2(a: i32) callconv(.c) i32 {
    return ffsXi2(i32, a);
}

pub fn __ffsdi2(a: i64) callconv(.c) i32 {
    return ffsXi2(i64, a);
}

pub fn __ffsti2(a: i128) callconv(.c) i32 {
    return ffsXi2(i128, a);
}

test {
    _ = @import("clzsi2_test.zig");
    _ = @import("clzdi2_test.zig");
    _ = @import("clzti2_test.zig");

    _ = @import("ctzsi2_test.zig");
    _ = @import("ctzdi2_test.zig");
    _ = @import("ctzti2_test.zig");

    _ = @import("ffssi2_test.zig");
    _ = @import("ffsdi2_test.zig");
    _ = @import("ffsti2_test.zig");
}