aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJacob Young <jacobly0@users.noreply.github.com>2023-03-03 01:18:23 -0500
committerJacob Young <jacobly0@users.noreply.github.com>2023-03-05 02:59:01 -0500
commit93d696e84ef17a32d5c2f1520a295ebcda968e91 (patch)
tree7494db8b659f454b31025c4a3660e98c06860d34 /lib
parenta8f4ac2b94e7945a5a1623547f258f5f32f12674 (diff)
downloadzig-93d696e84ef17a32d5c2f1520a295ebcda968e91.tar.gz
zig-93d696e84ef17a32d5c2f1520a295ebcda968e91.zip
CBE: implement some big integer and vector unary operations
Diffstat (limited to 'lib')
-rw-r--r--lib/zig.h422
1 files changed, 414 insertions, 8 deletions
diff --git a/lib/zig.h b/lib/zig.h
index c39cffee24..e5cb421c6f 100644
--- a/lib/zig.h
+++ b/lib/zig.h
@@ -1919,7 +1919,7 @@ static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, uint8_t bits) {
/* ========================== Big Integer Support =========================== */
-static inline uint16_t zig_big_bytes(uint16_t bits) {
+static inline uint16_t zig_int_bytes(uint16_t bits) {
uint16_t bytes = (bits + CHAR_BIT - 1) / CHAR_BIT;
uint16_t alignment = 16;
while (alignment / 2 >= bytes) alignment /= 2;
@@ -1931,7 +1931,7 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
const uint8_t *rhs_bytes = rhs;
uint16_t byte_offset = 0;
bool do_signed = is_signed;
- uint16_t remaining_bytes = zig_big_bytes(bits);
+ uint16_t remaining_bytes = zig_int_bytes(bits);
#if zig_little_endian
byte_offset = remaining_bytes;
@@ -1965,7 +1965,7 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
remaining_bytes -= 128 / CHAR_BIT;
#if zig_big_endian
- byte_offset -= 128 / CHAR_BIT;
+ byte_offset += 128 / CHAR_BIT;
#endif
}
@@ -1994,7 +1994,7 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
remaining_bytes -= 64 / CHAR_BIT;
#if zig_big_endian
- byte_offset -= 64 / CHAR_BIT;
+ byte_offset += 64 / CHAR_BIT;
#endif
}
@@ -2023,7 +2023,7 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
remaining_bytes -= 32 / CHAR_BIT;
#if zig_big_endian
- byte_offset -= 32 / CHAR_BIT;
+ byte_offset += 32 / CHAR_BIT;
#endif
}
@@ -2052,7 +2052,7 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
remaining_bytes -= 16 / CHAR_BIT;
#if zig_big_endian
- byte_offset -= 16 / CHAR_BIT;
+ byte_offset += 16 / CHAR_BIT;
#endif
}
@@ -2081,13 +2081,368 @@ static inline int32_t zig_cmp_big(const void *lhs, const void *rhs, bool is_sign
remaining_bytes -= 8 / CHAR_BIT;
#if zig_big_endian
- byte_offset -= 8 / CHAR_BIT;
+ byte_offset += 8 / CHAR_BIT;
#endif
}
return 0;
}
+static inline uint16_t zig_clz_big(const void *val, bool is_signed, uint16_t bits) {
+ const uint8_t *val_bytes = val;
+ uint16_t byte_offset = 0;
+ uint16_t remaining_bytes = zig_int_bytes(bits);
+ uint16_t skip_bits = remaining_bytes * 8 - bits;
+ uint16_t total_lz = 0;
+ uint16_t limb_lz;
+ (void)is_signed;
+
+#if zig_little_endian
+ byte_offset = remaining_bytes;
+#endif
+
+ while (remaining_bytes >= 128 / CHAR_BIT) {
+#if zig_little_endian
+ byte_offset -= 128 / CHAR_BIT;
+#endif
+
+ {
+ zig_u128 val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_lz = zig_clz_u128(val_limb, 128 - skip_bits);
+ }
+
+ total_lz += limb_lz;
+ if (limb_lz < 128 - skip_bits) return total_lz;
+ skip_bits = 0;
+ remaining_bytes -= 128 / CHAR_BIT;
+
+#if zig_big_endian
+ byte_offset += 128 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 64 / CHAR_BIT) {
+#if zig_little_endian
+ byte_offset -= 64 / CHAR_BIT;
+#endif
+
+ {
+ uint64_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_lz = zig_clz_u64(val_limb, 64 - skip_bits);
+ }
+
+ total_lz += limb_lz;
+ if (limb_lz < 64 - skip_bits) return total_lz;
+ skip_bits = 0;
+ remaining_bytes -= 64 / CHAR_BIT;
+
+#if zig_big_endian
+ byte_offset += 64 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 32 / CHAR_BIT) {
+#if zig_little_endian
+ byte_offset -= 32 / CHAR_BIT;
+#endif
+
+ {
+ uint32_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_lz = zig_clz_u32(val_limb, 32 - skip_bits);
+ }
+
+ total_lz += limb_lz;
+ if (limb_lz < 32 - skip_bits) return total_lz;
+ skip_bits = 0;
+ remaining_bytes -= 32 / CHAR_BIT;
+
+#if zig_big_endian
+ byte_offset += 32 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 16 / CHAR_BIT) {
+#if zig_little_endian
+ byte_offset -= 16 / CHAR_BIT;
+#endif
+
+ {
+ uint16_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_lz = zig_clz_u16(val_limb, 16 - skip_bits);
+ }
+
+ total_lz += limb_lz;
+ if (limb_lz < 16 - skip_bits) return total_lz;
+ skip_bits = 0;
+ remaining_bytes -= 16 / CHAR_BIT;
+
+#if zig_big_endian
+ byte_offset += 16 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 8 / CHAR_BIT) {
+#if zig_little_endian
+ byte_offset -= 8 / CHAR_BIT;
+#endif
+
+ {
+ uint8_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_lz = zig_clz_u8(val_limb, 8 - skip_bits);
+ }
+
+ total_lz += limb_lz;
+ if (limb_lz < 8 - skip_bits) return total_lz;
+ skip_bits = 0;
+ remaining_bytes -= 8 / CHAR_BIT;
+
+#if zig_big_endian
+ byte_offset += 8 / CHAR_BIT;
+#endif
+ }
+
+ return total_lz;
+}
+
+static inline uint16_t zig_ctz_big(const void *val, bool is_signed, uint16_t bits) {
+ const uint8_t *val_bytes = val;
+ uint16_t byte_offset = 0;
+ uint16_t remaining_bytes = zig_int_bytes(bits);
+ uint16_t total_tz = 0;
+ uint16_t limb_tz;
+ (void)is_signed;
+
+#if zig_big_endian
+ byte_offset = remaining_bytes;
+#endif
+
+ while (remaining_bytes >= 128 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 128 / CHAR_BIT;
+#endif
+
+ {
+ zig_u128 val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_tz = zig_ctz_u128(val_limb, 128);
+ }
+
+ total_tz += limb_tz;
+ if (limb_tz < 128) return total_tz;
+ remaining_bytes -= 128 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 128 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 64 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 64 / CHAR_BIT;
+#endif
+
+ {
+ uint64_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_tz = zig_ctz_u64(val_limb, 64);
+ }
+
+ total_tz += limb_tz;
+ if (limb_tz < 64) return total_tz;
+ remaining_bytes -= 64 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 64 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 32 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 32 / CHAR_BIT;
+#endif
+
+ {
+ uint32_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_tz = zig_ctz_u32(val_limb, 32);
+ }
+
+ total_tz += limb_tz;
+ if (limb_tz < 32) return total_tz;
+ remaining_bytes -= 32 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 32 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 16 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 16 / CHAR_BIT;
+#endif
+
+ {
+ uint16_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_tz = zig_ctz_u16(val_limb, 16);
+ }
+
+ total_tz += limb_tz;
+ if (limb_tz < 16) return total_tz;
+ remaining_bytes -= 16 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 16 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 8 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 8 / CHAR_BIT;
+#endif
+
+ {
+ uint8_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ limb_tz = zig_ctz_u8(val_limb, 8);
+ }
+
+ total_tz += limb_tz;
+ if (limb_tz < 8) return total_tz;
+ remaining_bytes -= 8 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 8 / CHAR_BIT;
+#endif
+ }
+
+ return total_tz;
+}
+
+static inline uint16_t zig_popcount_big(const void *val, bool is_signed, uint16_t bits) {
+ const uint8_t *val_bytes = val;
+ uint16_t byte_offset = 0;
+ uint16_t remaining_bytes = zig_int_bytes(bits);
+ uint16_t total_pc = 0;
+ (void)is_signed;
+
+#if zig_big_endian
+ byte_offset = remaining_bytes;
+#endif
+
+ while (remaining_bytes >= 128 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 128 / CHAR_BIT;
+#endif
+
+ {
+ zig_u128 val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ total_pc += zig_popcount_u128(val_limb, 128);
+ }
+
+ remaining_bytes -= 128 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 128 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 64 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 64 / CHAR_BIT;
+#endif
+
+ {
+ uint64_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ total_pc += zig_popcount_u64(val_limb, 64);
+ }
+
+ remaining_bytes -= 64 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 64 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 32 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 32 / CHAR_BIT;
+#endif
+
+ {
+ uint32_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ total_pc += zig_popcount_u32(val_limb, 32);
+ }
+
+ remaining_bytes -= 32 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 32 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 16 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 16 / CHAR_BIT;
+#endif
+
+ {
+ uint16_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ total_pc = zig_popcount_u16(val_limb, 16);
+ }
+
+ remaining_bytes -= 16 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 16 / CHAR_BIT;
+#endif
+ }
+
+ while (remaining_bytes >= 8 / CHAR_BIT) {
+#if zig_big_endian
+ byte_offset -= 8 / CHAR_BIT;
+#endif
+
+ {
+ uint8_t val_limb;
+
+ memcpy(&val_limb, &val_bytes[byte_offset], sizeof(val_limb));
+ total_pc = zig_popcount_u8(val_limb, 8);
+ }
+
+ remaining_bytes -= 8 / CHAR_BIT;
+
+#if zig_little_endian
+ byte_offset += 8 / CHAR_BIT;
+#endif
+ }
+
+ return total_pc;
+}
+
/* ========================= Floating Point Support ========================= */
#if _MSC_VER
@@ -2742,7 +3097,7 @@ zig_msvc_atomics_128op(u128, max)
uint32_t index = 0; \
const uint8_t *lhs_ptr = lhs; \
const uint8_t *rhs_ptr = rhs; \
- uint16_t elem_bytes = zig_big_bytes(elem_bits); \
+ uint16_t elem_bytes = zig_int_bytes(elem_bits); \
\
while (index < len) { \
result[index] = zig_cmp_big(lhs_ptr, rhs_ptr, is_signed, elem_bits) operator 0; \
@@ -2758,6 +3113,57 @@ zig_cmp_vec(le, <=)
zig_cmp_vec(gt, > )
zig_cmp_vec(ge, >=)
+static inline void zig_clz_vec(void *result, const void *val, uint32_t len, bool is_signed, uint16_t elem_bits) {
+ uint32_t index = 0;
+ const uint8_t *val_ptr = val;
+ uint16_t elem_bytes = zig_int_bytes(elem_bits);
+
+ while (index < len) {
+ uint16_t lz = zig_clz_big(val_ptr, is_signed, elem_bits);
+ if (elem_bits <= 128) {
+ ((uint8_t *)result)[index] = (uint8_t)lz;
+ } else {
+ ((uint16_t *)result)[index] = lz;
+ }
+ val_ptr += elem_bytes;
+ index += 1;
+ }
+}
+
+static inline void zig_ctz_vec(void *result, const void *val, uint32_t len, bool is_signed, uint16_t elem_bits) {
+ uint32_t index = 0;
+ const uint8_t *val_ptr = val;
+ uint16_t elem_bytes = zig_int_bytes(elem_bits);
+
+ while (index < len) {
+ uint16_t tz = zig_ctz_big(val_ptr, is_signed, elem_bits);
+ if (elem_bits <= 128) {
+ ((uint8_t *)result)[index] = (uint8_t)tz;
+ } else {
+ ((uint16_t *)result)[index] = tz;
+ }
+ val_ptr += elem_bytes;
+ index += 1;
+ }
+}
+
+static inline void zig_popcount_vec(void *result, const void *val, uint32_t len, bool is_signed, uint16_t elem_bits) {
+ uint32_t index = 0;
+ const uint8_t *val_ptr = val;
+ uint16_t elem_bytes = zig_int_bytes(elem_bits);
+
+ while (index < len) {
+ uint16_t pc = zig_popcount_big(val_ptr, is_signed, elem_bits);
+ if (elem_bits <= 128) {
+ ((uint8_t *)result)[index] = (uint8_t)pc;
+ } else {
+ ((uint16_t *)result)[index] = pc;
+ }
+ val_ptr += elem_bytes;
+ index += 1;
+ }
+}
+
/* ======================== Special Case Intrinsics ========================= */
#if (_MSC_VER && _M_X64) || defined(__x86_64__)