aboutsummaryrefslogtreecommitdiff
path: root/lib/include/arm_neon.h
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2021-08-15 18:00:10 -0700
committerAndrew Kelley <andrew@ziglang.org>2021-08-15 18:00:10 -0700
commit21606339af2712d94bb3cfdcc9050287c5a2134c (patch)
treed5df6035a82eb191bf6f2d92518c5250d31833d5 /lib/include/arm_neon.h
parent78ff2a148a707f041ab5e5cfdbb5f854bc66270e (diff)
downloadzig-21606339af2712d94bb3cfdcc9050287c5a2134c.tar.gz
zig-21606339af2712d94bb3cfdcc9050287c5a2134c.zip
update C header files to clang 13 rc1
Diffstat (limited to 'lib/include/arm_neon.h')
-rw-r--r--lib/include/arm_neon.h1242
1 files changed, 1077 insertions, 165 deletions
diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h
index dbb65b8a49..2448870ce4 100644
--- a/lib/include/arm_neon.h
+++ b/lib/include/arm_neon.h
@@ -2433,6 +2433,96 @@ __ai int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) {
#endif
#ifdef __LITTLE_ENDIAN__
+__ai poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) {
+ poly8x8_t __ret;
+ __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 4);
+ return __ret;
+}
+#else
+__ai poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) {
+ poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly8x8_t __ret;
+ __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai poly64x1_t vadd_p64(poly64x1_t __p0, poly64x1_t __p1) {
+ poly64x1_t __ret;
+ __ret = (poly64x1_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 6);
+ return __ret;
+}
+#ifdef __LITTLE_ENDIAN__
+__ai poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) {
+ poly16x4_t __ret;
+ __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 5);
+ return __ret;
+}
+#else
+__ai poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) {
+ poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ poly16x4_t __ret;
+ __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 5);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) {
+ poly8x16_t __ret;
+ __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 36);
+ return __ret;
+}
+#else
+__ai poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) {
+ poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly8x16_t __ret;
+ __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) {
+ poly64x2_t __ret;
+ __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 38);
+ return __ret;
+}
+#else
+__ai poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) {
+ poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ poly64x2_t __ret;
+ __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 38);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) {
+ poly16x8_t __ret;
+ __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 37);
+ return __ret;
+}
+#else
+__ai poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) {
+ poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ poly16x8_t __ret;
+ __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 37);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
__ai uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) {
uint16x4_t __ret;
__ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17);
@@ -4553,6 +4643,54 @@ __ai uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) {
#endif
#ifdef __LITTLE_ENDIAN__
+__ai int8x16_t vclsq_u8(uint8x16_t __p0) {
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32);
+ return __ret;
+}
+#else
+__ai int8x16_t vclsq_u8(uint8x16_t __p0) {
+ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int32x4_t vclsq_u32(uint32x4_t __p0) {
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34);
+ return __ret;
+}
+#else
+__ai int32x4_t vclsq_u32(uint32x4_t __p0) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int16x8_t vclsq_u16(uint16x8_t __p0) {
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33);
+ return __ret;
+}
+#else
+__ai int16x8_t vclsq_u16(uint16x8_t __p0) {
+ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
__ai int8x16_t vclsq_s8(int8x16_t __p0) {
int8x16_t __ret;
__ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32);
@@ -4601,6 +4739,54 @@ __ai int16x8_t vclsq_s16(int16x8_t __p0) {
#endif
#ifdef __LITTLE_ENDIAN__
+__ai int8x8_t vcls_u8(uint8x8_t __p0) {
+ int8x8_t __ret;
+ __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0);
+ return __ret;
+}
+#else
+__ai int8x8_t vcls_u8(uint8x8_t __p0) {
+ uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x8_t __ret;
+ __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int32x2_t vcls_u32(uint32x2_t __p0) {
+ int32x2_t __ret;
+ __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2);
+ return __ret;
+}
+#else
+__ai int32x2_t vcls_u32(uint32x2_t __p0) {
+ uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ int32x2_t __ret;
+ __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int16x4_t vcls_u16(uint16x4_t __p0) {
+ int16x4_t __ret;
+ __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1);
+ return __ret;
+}
+#else
+__ai int16x4_t vcls_u16(uint16x4_t __p0) {
+ uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ int16x4_t __ret;
+ __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
__ai int8x8_t vcls_s8(int8x8_t __p0) {
int8x8_t __ret;
__ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0);
@@ -34889,7 +35075,7 @@ __ai uint32x2_t vcvtp_u32_f32(float32x2_t __p0) {
#endif
#endif
-#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES)
#ifdef __LITTLE_ENDIAN__
__ai uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) {
uint8x16_t __ret;
@@ -34956,168 +35142,6 @@ __ai uint8x16_t vaesmcq_u8(uint8x16_t __p0) {
}
#endif
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__p0, __p1, __p2);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-__ai uint32_t vsha1h_u32(uint32_t __p0) {
- uint32_t __ret;
- __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0);
- return __ret;
-}
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__p0, __p1, __p2);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__p0, __p1, __p2);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
- return __ret;
-}
-#else
-__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
- uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
- uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
- uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
- uint32x4_t __ret;
- __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
- __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
- return __ret;
-}
-#endif
-
#endif
#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)
#ifdef __LITTLE_ENDIAN__
@@ -35684,6 +35708,759 @@ __ai float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) {
#endif
#endif
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA2)
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__p0, __p1, __p2);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+__ai uint32_t vsha1h_u32(uint32_t __p0) {
+ uint32_t __ret;
+ __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0);
+ return __ret;
+}
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__p0, __p1, __p2);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__p0, __p1, __p2);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha1su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256hq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256h2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsha256su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#endif
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA3) && defined(__aarch64__)
+#ifdef __LITTLE_ENDIAN__
+__ai uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
+ uint8x16_t __ret;
+ __ret = (uint8x16_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48);
+ return __ret;
+}
+#else
+__ai uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
+ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __ret;
+ __ret = (uint8x16_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
+ uint16x8_t __ret;
+ __ret = (uint16x8_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49);
+ return __ret;
+}
+#else
+__ai uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
+ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __ret;
+ __ret = (uint16x8_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32);
+ return __ret;
+}
+#else
+__ai int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
+ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34);
+ return __ret;
+}
+#else
+__ai int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
+ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
+ int64x2_t __ret;
+ __ret = (int64x2_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35);
+ return __ret;
+}
+#else
+__ai int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
+ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ int64x2_t __ret;
+ __ret = (int64x2_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_vbcaxq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33);
+ return __ret;
+}
+#else
+__ai int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
+ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_vbcaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
+ uint8x16_t __ret;
+ __ret = (uint8x16_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48);
+ return __ret;
+}
+#else
+__ai uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) {
+ uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint8x16_t __ret;
+ __ret = (uint8x16_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
+ uint16x8_t __ret;
+ __ret = (uint16x8_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49);
+ return __ret;
+}
+#else
+__ai uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) {
+ uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
+ uint16x8_t __ret;
+ __ret = (uint16x8_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32);
+ return __ret;
+}
+#else
+__ai int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) {
+ int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ int8x16_t __ret;
+ __ret = (int8x16_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32);
+ __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34);
+ return __ret;
+}
+#else
+__ai int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) {
+ int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ int32x4_t __ret;
+ __ret = (int32x4_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
+ int64x2_t __ret;
+ __ret = (int64x2_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35);
+ return __ret;
+}
+#else
+__ai int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) {
+ int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ int64x2_t __ret;
+ __ret = (int64x2_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_veor3q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33);
+ return __ret;
+}
+#else
+__ai int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) {
+ int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0);
+ int16x8_t __ret;
+ __ret = (int16x8_t) __builtin_neon_veor3q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33);
+ __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vrax1q_v((int8x16_t)__p0, (int8x16_t)__p1, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vrax1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define vxarq_u64(__p0, __p1, __p2) __extension__ ({ \
+ uint64x2_t __s0 = __p0; \
+ uint64x2_t __s1 = __p1; \
+ uint64x2_t __ret; \
+ __ret = (uint64x2_t) __builtin_neon_vxarq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \
+ __ret; \
+})
+#else
+#define vxarq_u64(__p0, __p1, __p2) __extension__ ({ \
+ uint64x2_t __s0 = __p0; \
+ uint64x2_t __s1 = __p1; \
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \
+ uint64x2_t __ret; \
+ __ret = (uint64x2_t) __builtin_neon_vxarq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \
+ __ret; \
+})
+#endif
+
+#endif
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA512) && defined(__aarch64__)
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512hq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512hq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512h2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512h2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512su0q_v((int8x16_t)__p0, (int8x16_t)__p1, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512su0q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512su1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51);
+ return __ret;
+}
+#else
+__ai uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) {
+ uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0);
+ uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0);
+ uint64x2_t __ret;
+ __ret = (uint64x2_t) __builtin_neon_vsha512su1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#endif
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM3) && defined(__aarch64__)
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define vsm3tt1aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \
+ __ret; \
+})
+#else
+#define vsm3tt1aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
+ __ret; \
+})
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define vsm3tt1bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \
+ __ret; \
+})
+#else
+#define vsm3tt1bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
+ __ret; \
+})
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define vsm3tt2aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \
+ __ret; \
+})
+#else
+#define vsm3tt2aq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
+ __ret; \
+})
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define vsm3tt2bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \
+ __ret; \
+})
+#else
+#define vsm3tt2bq_u32(__p0, __p1, __p2, __p3) __extension__ ({ \
+ uint32x4_t __s0 = __p0; \
+ uint32x4_t __s1 = __p1; \
+ uint32x4_t __s2 = __p2; \
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \
+ uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \
+ uint32x4_t __ret; \
+ __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
+ __ret; \
+})
+#endif
+
+#endif
+#if __ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM4) && defined(__aarch64__)
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm4eq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm4eq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_v((int8x16_t)__p0, (int8x16_t)__p1, 50);
+ return __ret;
+}
+#else
+__ai uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) {
+ uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0);
+ uint32x4_t __ret;
+ __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#endif
#if __ARM_ARCH >= 8 && defined(__aarch64__)
#ifdef __LITTLE_ENDIAN__
__ai int64x2_t vcvtaq_s64_f64(float64x2_t __p0) {
@@ -37963,6 +38740,136 @@ __ai float64x1_t vrndx_f64(float64x1_t __p0) {
return __ret;
}
#endif
+#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)
+#ifdef __LITTLE_ENDIAN__
+__ai float32x4_t vrnd32xq_f32(float32x4_t __p0) {
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd32xq_v((int8x16_t)__p0, 41);
+ return __ret;
+}
+#else
+__ai float32x4_t vrnd32xq_f32(float32x4_t __p0) {
+ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd32xq_v((int8x16_t)__rev0, 41);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x2_t vrnd32x_f32(float32x2_t __p0) {
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd32x_v((int8x8_t)__p0, 9);
+ return __ret;
+}
+#else
+__ai float32x2_t vrnd32x_f32(float32x2_t __p0) {
+ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd32x_v((int8x8_t)__rev0, 9);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x4_t vrnd32zq_f32(float32x4_t __p0) {
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd32zq_v((int8x16_t)__p0, 41);
+ return __ret;
+}
+#else
+__ai float32x4_t vrnd32zq_f32(float32x4_t __p0) {
+ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd32zq_v((int8x16_t)__rev0, 41);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x2_t vrnd32z_f32(float32x2_t __p0) {
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd32z_v((int8x8_t)__p0, 9);
+ return __ret;
+}
+#else
+__ai float32x2_t vrnd32z_f32(float32x2_t __p0) {
+ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd32z_v((int8x8_t)__rev0, 9);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x4_t vrnd64xq_f32(float32x4_t __p0) {
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd64xq_v((int8x16_t)__p0, 41);
+ return __ret;
+}
+#else
+__ai float32x4_t vrnd64xq_f32(float32x4_t __p0) {
+ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd64xq_v((int8x16_t)__rev0, 41);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x2_t vrnd64x_f32(float32x2_t __p0) {
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd64x_v((int8x8_t)__p0, 9);
+ return __ret;
+}
+#else
+__ai float32x2_t vrnd64x_f32(float32x2_t __p0) {
+ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd64x_v((int8x8_t)__rev0, 9);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x4_t vrnd64zq_f32(float32x4_t __p0) {
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd64zq_v((int8x16_t)__p0, 41);
+ return __ret;
+}
+#else
+__ai float32x4_t vrnd64zq_f32(float32x4_t __p0) {
+ float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0);
+ float32x4_t __ret;
+ __ret = (float32x4_t) __builtin_neon_vrnd64zq_v((int8x16_t)__rev0, 41);
+ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0);
+ return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai float32x2_t vrnd64z_f32(float32x2_t __p0) {
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd64z_v((int8x8_t)__p0, 9);
+ return __ret;
+}
+#else
+__ai float32x2_t vrnd64z_f32(float32x2_t __p0) {
+ float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0);
+ float32x2_t __ret;
+ __ret = (float32x2_t) __builtin_neon_vrnd64z_v((int8x8_t)__rev0, 9);
+ __ret = __builtin_shufflevector(__ret, __ret, 1, 0);
+ return __ret;
+}
+#endif
+
+#endif
#if __ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)
#ifdef __LITTLE_ENDIAN__
__ai float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) {
@@ -42710,7 +43617,7 @@ __ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2)
#endif
#endif
-#if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)
+#if defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)
#ifdef __LITTLE_ENDIAN__
__ai float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) {
float32x4_t __ret;
@@ -46786,6 +47693,11 @@ __ai int64_t vaddd_s64(int64_t __p0, int64_t __p1) {
__ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1);
return __ret;
}
+__ai poly128_t vaddq_p128(poly128_t __p0, poly128_t __p1) {
+ poly128_t __ret;
+ __ret = (poly128_t) __builtin_neon_vaddq_p128(__p0, __p1);
+ return __ret;
+}
#ifdef __LITTLE_ENDIAN__
__ai uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) {
uint16x8_t __ret;
@@ -67227,7 +68139,7 @@ __ai float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) {
#endif
#endif
-#if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)
+#if defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)
#ifdef __LITTLE_ENDIAN__
#define vfmlalq_lane_high_f16(__p0_869, __p1_869, __p2_869, __p3_869) __extension__ ({ \
float32x4_t __s0_869 = __p0_869; \