lib/headers: update to clang 10.x C headers

upstream revision: 3cce3790072249cbe51b96cea26bc78019c11fd0
author: Andrew Kelley <andrew@ziglang.org> 2020-01-22 17:42:02 -0500
committer: Andrew Kelley <andrew@ziglang.org> 2020-01-22 17:42:44 -0500
commit: 74872263cc83bb7cbd8d548e83b441f4bf2f2249 (patch)
tree: 6c4e7bb2cbcb97d3b16259ecebcb05915921ecfe /lib/include/ppc_wrappers
parent: 97b2ac598b91194c09a96c9ed86e4f9b266d019c (diff)
download: zig-74872263cc83bb7cbd8d548e83b441f4bf2f2249.tar.gz
zig-74872263cc83bb7cbd8d548e83b441f4bf2f2249.zip
7 files changed, 757 insertions, 2 deletions
diff --git a/lib/include/ppc_wrappers/emmintrin.h b/lib/include/ppc_wrappers/emmintrin.h
index 617ce24acd..4dcb8485e2 100644
--- a/lib/include/ppc_wrappers/emmintrin.h
+++ b/lib/include/ppc_wrappers/emmintrin.h
@@ -35,6 +35,8 @@
 #ifndef EMMINTRIN_H_
 #define EMMINTRIN_H_
 
+#if defined(__linux__) && defined(__ppc64__)
+
 #include <altivec.h>
 
 /* We need definitions from the SSE header files.  */
@@ -1747,7 +1749,7 @@ _mm_sll_epi64 (__m128i __A, __m128i __B)
   lshift = vec_splat ((__v2du) __B, 0);
   shmask = vec_cmplt (lshift, shmax);
   result = vec_sl ((__v2du) __A, lshift);
-  result = vec_sel ((__v2du) shmask, result, shmask);
+  result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask);
 
   return (__m128i) result;
 }
@@ -1841,7 +1843,7 @@ _mm_srl_epi64 (__m128i __A, __m128i __B)
   rshift = vec_splat ((__v2du) __B, 0);
   shmask = vec_cmplt (rshift, shmax);
   result = vec_sr ((__v2du) __A, rshift);
-  result = vec_sel ((__v2du) shmask, result, shmask);
+  result = (__v2du)vec_sel ((__v2df) shmask, (__v2df)result, shmask);
 
   return (__m128i) result;
 }
@@ -2315,4 +2317,8 @@ _mm_castsi128_pd(__m128i __A)
   return (__m128d) __A;
 }
 
+#else
+#include_next <emmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
 #endif /* EMMINTRIN_H_ */
diff --git a/lib/include/ppc_wrappers/mm_malloc.h b/lib/include/ppc_wrappers/mm_malloc.h
index d91d7865c8..24b14c8e07 100644
--- a/lib/include/ppc_wrappers/mm_malloc.h
+++ b/lib/include/ppc_wrappers/mm_malloc.h
@@ -10,6 +10,8 @@
 #ifndef _MM_MALLOC_H_INCLUDED
 #define _MM_MALLOC_H_INCLUDED
 
+#if defined(__linux__) && defined(__ppc64__)
+
 #include <stdlib.h>
 
 /* We can't depend on <stdlib.h> since the prototype of posix_memalign
@@ -41,4 +43,8 @@ _mm_free (void * ptr)
   free (ptr);
 }
 
+#else
+#include_next <mm_malloc.h>
+#endif
+
 #endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/lib/include/ppc_wrappers/mmintrin.h b/lib/include/ppc_wrappers/mmintrin.h
index b949653adf..c55c44726f 100644
--- a/lib/include/ppc_wrappers/mmintrin.h
+++ b/lib/include/ppc_wrappers/mmintrin.h
@@ -35,6 +35,8 @@
 #ifndef _MMINTRIN_H_INCLUDED
 #define _MMINTRIN_H_INCLUDED
 
+#if defined(__linux__) && defined(__ppc64__)
+
 #include <altivec.h>
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
@@ -1440,4 +1442,9 @@ extern __inline __m64
   return (res.as_m64);
 #endif
 }
+
+#else
+#include_next <mmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
 #endif /* _MMINTRIN_H_INCLUDED */
diff --git a/lib/include/ppc_wrappers/pmmintrin.h b/lib/include/ppc_wrappers/pmmintrin.h
new file mode 100644
index 0000000000..6d93383d54
--- /dev/null
+++ b/lib/include/ppc_wrappers/pmmintrin.h
@@ -0,0 +1,150 @@
+/*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+   makes explicit use of Intel intrinsics to powerpc64le.
+   It is the user's responsibility to determine if the results are
+   acceptable and make additional changes as necessary.
+   Note that much code that uses Intel intrinsics can be rewritten in
+   standard C or GNU C extensions, which are more portable and better
+   optimized across multiple targets.
+
+   In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA
+   is a good match for most SIMD operations.  However the Horizontal
+   add/sub requires the data pairs be permuted into a separate
+   registers with vertical even/odd alignment for the operation.
+   And the addsub operation requires the sign of only the even numbered
+   elements be flipped (xored with -0.0).
+   For larger blocks of code using these intrinsic implementations,
+   the compiler be should be able to schedule instructions to avoid
+   additional latency.
+
+   In the specific case of the monitor and mwait instructions there are
+   no direct equivalent in the PowerISA at this time.  So those
+   intrinsics are not implemented.  */
+#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this warning."
+#endif
+
+#ifndef PMMINTRIN_H_
+#define PMMINTRIN_H_
+
+#if defined(__linux__) && defined(__ppc64__)
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+  const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0};
+  __v4sf even_neg_Y = vec_xor(__Y, even_n0);
+  return (__m128) vec_add (__X, even_neg_Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+  const __v2df even_n0 = {-0.0, 0.0};
+  __v2df even_neg_Y = vec_xor(__Y, even_n0);
+  return (__m128d) vec_add (__X, even_neg_Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+  __vector unsigned char xform2 = {
+      0x00, 0x01, 0x02, 0x03,
+      0x08, 0x09, 0x0A, 0x0B,
+      0x10, 0x11, 0x12, 0x13,
+      0x18, 0x19, 0x1A, 0x1B
+    };
+  __vector unsigned char xform1 = {
+      0x04, 0x05, 0x06, 0x07,
+      0x0C, 0x0D, 0x0E, 0x0F,
+      0x14, 0x15, 0x16, 0x17,
+      0x1C, 0x1D, 0x1E, 0x1F
+    };
+  return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
+			   vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+  __vector unsigned char xform2 = {
+      0x00, 0x01, 0x02, 0x03,
+      0x08, 0x09, 0x0A, 0x0B,
+      0x10, 0x11, 0x12, 0x13,
+      0x18, 0x19, 0x1A, 0x1B
+    };
+  __vector unsigned char xform1 = {
+      0x04, 0x05, 0x06, 0x07,
+      0x0C, 0x0D, 0x0E, 0x0F,
+      0x14, 0x15, 0x16, 0x17,
+      0x1C, 0x1D, 0x1E, 0x1F
+    };
+  return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2),
+			   vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) vec_add (vec_mergeh ((__v2df) __X, (__v2df)__Y),
+				  vec_mergel ((__v2df) __X, (__v2df)__Y));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) vec_sub (vec_mergeh ((__v2df) __X, (__v2df)__Y),
+			    vec_mergel ((__v2df) __X, (__v2df)__Y));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+  return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+  return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+  return (__m128d) vec_splats (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+  return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+  return (__m128i) (vec_vsx_ld(0, (signed int const *)__P));
+}
+
+/* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait.  */
+
+#else
+#include_next <pmmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
+#endif /* PMMINTRIN_H_ */
diff --git a/lib/include/ppc_wrappers/smmintrin.h b/lib/include/ppc_wrappers/smmintrin.h
new file mode 100644
index 0000000000..56ef6ba76b
--- /dev/null
+++ b/lib/include/ppc_wrappers/smmintrin.h
@@ -0,0 +1,85 @@
+/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.
+
+   NOTE: This is NOT a complete implementation of the SSE4 intrinsics!  */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+   makes explicit use of Intel intrinsics to powerp64/powerpc64le.
+
+   It is the user's responsibility to determine if the results are
+   acceptable and make additional changes as necessary.
+
+   Note that much code that uses Intel intrinsics can be rewritten in
+   standard C or GNU C extensions, which are more portable and better
+   optimized across multiple targets.  */
+#error                                                                         \
+    "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
+#endif
+
+#ifndef SMMINTRIN_H_
+#define SMMINTRIN_H_
+
+#if defined(__linux__) && defined(__ppc64__)
+
+#include <altivec.h>
+#include <emmintrin.h>
+
+extern __inline int
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_extract_epi8(__m128i __X, const int __N) {
+  return (unsigned char)((__v16qi)__X)[__N & 15];
+}
+
+extern __inline int
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_extract_epi32(__m128i __X, const int __N) {
+  return ((__v4si)__X)[__N & 3];
+}
+
+extern __inline int
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_extract_epi64(__m128i __X, const int __N) {
+  return ((__v2di)__X)[__N & 1];
+}
+
+extern __inline int
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_extract_ps(__m128 __X, const int __N) {
+  return ((__v4si)__X)[__N & 3];
+}
+
+extern __inline __m128i
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
+  __v16qi __charmask = vec_splats((signed char)__imm8);
+  __charmask = vec_gb(__charmask);
+  __v8hu __shortmask = (__v8hu)vec_unpackh(__charmask);
+#ifdef __BIG_ENDIAN__
+  __shortmask = vec_reve(__shortmask);
+#endif
+  return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
+}
+
+extern __inline __m128i
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
+  const __v16qu __seven = vec_splats((unsigned char)0x07);
+  __v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
+  return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask);
+}
+
+#else
+#include_next <smmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
+#endif /* _SMMINTRIN_H_ */
diff --git a/lib/include/ppc_wrappers/tmmintrin.h b/lib/include/ppc_wrappers/tmmintrin.h
new file mode 100644
index 0000000000..b5a935d5e4
--- /dev/null
+++ b/lib/include/ppc_wrappers/tmmintrin.h
@@ -0,0 +1,495 @@
+/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header is distributed to simplify porting x86_64 code that
+   makes explicit use of Intel intrinsics to powerpc64le.
+
+   It is the user's responsibility to determine if the results are
+   acceptable and make additional changes as necessary.
+
+   Note that much code that uses Intel intrinsics can be rewritten in
+   standard C or GNU C extensions, which are more portable and better
+   optimized across multiple targets.  */
+#endif
+
+#ifndef TMMINTRIN_H_
+#define TMMINTRIN_H_
+
+#if defined(__linux__) && defined(__ppc64__)
+
+#include <altivec.h>
+
+/* We need definitions from the SSE header files.  */
+#include <pmmintrin.h>
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi16 (__m128i __A)
+{
+  return (__m128i) vec_abs ((__v8hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi32 (__m128i __A)
+{
+  return (__m128i) vec_abs ((__v4si) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi8 (__m128i __A)
+{
+  return (__m128i) vec_abs ((__v16qi) __A);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi16 (__m64 __A)
+{
+  __v8hi __B = (__v8hi) (__v2du) { __A, __A };
+  return (__m64) ((__v2du) vec_abs (__B))[0];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi32 (__m64 __A)
+{
+  __v4si __B = (__v4si) (__v2du) { __A, __A };
+  return (__m64) ((__v2du) vec_abs (__B))[0];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi8 (__m64 __A)
+{
+  __v16qi __B = (__v16qi) (__v2du) { __A, __A };
+  return (__m64) ((__v2du) vec_abs (__B))[0];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
+{
+  if (__builtin_constant_p (__count) && __count < 16)
+    {
+#ifdef __LITTLE_ENDIAN__
+      __A = (__m128i) vec_reve ((__v16qu) __A);
+      __B = (__m128i) vec_reve ((__v16qu) __B);
+#endif
+      __A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
+#ifdef __LITTLE_ENDIAN__
+      __A = (__m128i) vec_reve ((__v16qu) __A);
+#endif
+      return __A;
+    }
+
+  if (__count == 0)
+    return __B;
+
+  if (__count >= 16)
+    {
+      if (__count >= 32)
+	{
+	  const __v16qu zero = { 0 };
+	  return (__m128i) zero;
+	}
+      else
+	{
+	  const __v16qu __shift =
+	    vec_splats ((unsigned char) ((__count - 16) * 8));
+#ifdef __LITTLE_ENDIAN__
+	  return (__m128i) vec_sro ((__v16qu) __A, __shift);
+#else
+	  return (__m128i) vec_slo ((__v16qu) __A, __shift);
+#endif
+	}
+    }
+  else
+    {
+      const __v16qu __shiftA =
+	vec_splats ((unsigned char) ((16 - __count) * 8));
+      const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
+#ifdef __LITTLE_ENDIAN__
+      __A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
+      __B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
+#else
+      __A = (__m128i) vec_sro ((__v16qu) __A, __shiftA);
+      __B = (__m128i) vec_slo ((__v16qu) __B, __shiftB);
+#endif
+      return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B);
+    }
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count)
+{
+  if (__count < 16)
+    {
+      __v2du __C = { __B, __A };
+#ifdef __LITTLE_ENDIAN__
+      const __v4su __shift = { __count << 3, 0, 0, 0 };
+      __C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
+#else
+      const __v4su __shift = { 0, 0, 0, __count << 3 };
+      __C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift);
+#endif
+      return (__m64) __C[0];
+    }
+  else
+    {
+      const __m64 __zero = { 0 };
+      return __zero;
+    }
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi16 (__m128i __A, __m128i __B)
+{
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
+  __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
+  __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
+  return (__m128i) vec_add (__C, __D);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi32 (__m128i __A, __m128i __B)
+{
+  const __v16qu __P =
+    {  0,  1,  2,  3,  8,  9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
+  const __v16qu __Q =
+    {  4,  5,  6,  7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
+  __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
+  __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
+  return (__m128i) vec_add (__C, __D);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi16 (__m64 __A, __m64 __B)
+{
+  __v8hi __C = (__v8hi) (__v2du) { __A, __B };
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
+  __v8hi __D = vec_perm (__C, __C, __Q);
+  __C = vec_perm (__C, __C, __P);
+  __C = vec_add (__C, __D);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi32 (__m64 __A, __m64 __B)
+{
+  __v4si __C = (__v4si) (__v2du) { __A, __B };
+  const __v16qu __P =
+    {  0,  1,  2,  3,  8,  9, 10, 11,  0,  1,  2,  3,  8,  9, 10, 11 };
+  const __v16qu __Q =
+    {  4,  5,  6,  7, 12, 13, 14, 15,  4,  5,  6,  7, 12, 13, 14, 15 };
+  __v4si __D = vec_perm (__C, __C, __Q);
+  __C = vec_perm (__C, __C, __P);
+  __C = vec_add (__C, __D);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_epi16 (__m128i __A, __m128i __B)
+{
+  __v4si __C = { 0 }, __D = { 0 };
+  __C = vec_sum4s ((__v8hi) __A, __C);
+  __D = vec_sum4s ((__v8hi) __B, __D);
+  __C = (__v4si) vec_packs (__C, __D);
+  return (__m128i) __C;
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_pi16 (__m64 __A, __m64 __B)
+{
+  const __v4si __zero = { 0 };
+  __v8hi __C = (__v8hi) (__v2du) { __A, __B };
+  __v4si __D = vec_sum4s (__C, __zero);
+  __C = vec_packs (__D, __D);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi16 (__m128i __A, __m128i __B)
+{
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
+  __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
+  __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
+  return (__m128i) vec_sub (__C, __D);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi32 (__m128i __A, __m128i __B)
+{
+  const __v16qu __P =
+    {  0,  1,  2,  3,  8,  9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
+  const __v16qu __Q =
+    {  4,  5,  6,  7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
+  __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
+  __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
+  return (__m128i) vec_sub (__C, __D);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi16 (__m64 __A, __m64 __B)
+{
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
+  __v8hi __C = (__v8hi) (__v2du) { __A, __B };
+  __v8hi __D = vec_perm (__C, __C, __Q);
+  __C = vec_perm (__C, __C, __P);
+  __C = vec_sub (__C, __D);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi32 (__m64 __A, __m64 __B)
+{
+  const __v16qu __P =
+    {  0,  1,  2,  3,  8,  9, 10, 11,  0,  1,  2,  3,  8,  9, 10, 11 };
+  const __v16qu __Q =
+    {  4,  5,  6,  7, 12, 13, 14, 15,  4,  5,  6,  7, 12, 13, 14, 15 };
+  __v4si __C = (__v4si) (__v2du) { __A, __B };
+  __v4si __D = vec_perm (__C, __C, __Q);
+  __C = vec_perm (__C, __C, __P);
+  __C = vec_sub (__C, __D);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_epi16 (__m128i __A, __m128i __B)
+{
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
+  __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
+  __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
+  return (__m128i) vec_subs (__C, __D);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_pi16 (__m64 __A, __m64 __B)
+{
+  const __v16qu __P =
+    {  0,  1,  4,  5,  8,  9, 12, 13,  0,  1,  4,  5,  8,  9, 12, 13 };
+  const __v16qu __Q =
+    {  2,  3,  6,  7, 10, 11, 14, 15,  2,  3,  6,  7, 10, 11, 14, 15 };
+  __v8hi __C = (__v8hi) (__v2du) { __A, __B };
+  __v8hi __D = vec_perm (__C, __C, __P);
+  __v8hi __E = vec_perm (__C, __C, __Q);
+  __C = vec_subs (__D, __E);
+  return (__m64) ((__v2du) __C)[1];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8 (__m128i __A, __m128i __B)
+{
+  const __v16qi __zero = { 0 };
+  __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
+  __v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
+  return (__m128i) vec_sel (__C, __zero, __select);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi8 (__m64 __A, __m64 __B)
+{
+  const __v16qi __zero = { 0 };
+  __v16qi __C = (__v16qi) (__v2du) { __A, __A };
+  __v16qi __D = (__v16qi) (__v2du) { __B, __B };
+  __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
+  __C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
+  __C = vec_sel (__C, __zero, __select);
+  return (__m64) ((__v2du) (__C))[0];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi8 (__m128i __A, __m128i __B)
+{
+  const __v16qi __zero = { 0 };
+  __v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero);
+  __v16qi __selectpos =
+    (__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero));
+  __v16qi __conv = vec_add (__selectneg, __selectpos);
+  return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi16 (__m128i __A, __m128i __B)
+{
+  const __v8hi __zero = { 0 };
+  __v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero);
+  __v8hi __selectpos =
+    (__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero));
+  __v8hi __conv = vec_add (__selectneg, __selectpos);
+  return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi32 (__m128i __A, __m128i __B)
+{
+  const __v4si __zero = { 0 };
+  __v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero);
+  __v4si __selectpos =
+    (__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero));
+  __v4si __conv = vec_add (__selectneg, __selectpos);
+  return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi8 (__m64 __A, __m64 __B)
+{
+  const __v16qi __zero = { 0 };
+  __v16qi __C = (__v16qi) (__v2du) { __A, __A };
+  __v16qi __D = (__v16qi) (__v2du) { __B, __B };
+  __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
+  return (__m64) ((__v2du) (__C))[0];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi16 (__m64 __A, __m64 __B)
+{
+  const __v8hi __zero = { 0 };
+  __v8hi __C = (__v8hi) (__v2du) { __A, __A };
+  __v8hi __D = (__v8hi) (__v2du) { __B, __B };
+  __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
+  return (__m64) ((__v2du) (__C))[0];
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi32 (__m64 __A, __m64 __B)
+{
+  const __v4si __zero = { 0 };
+  __v4si __C = (__v4si) (__v2du) { __A, __A };
+  __v4si __D = (__v4si) (__v2du) { __B, __B };
+  __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
+  return (__m64) ((__v2du) (__C))[0];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_epi16 (__m128i __A, __m128i __B)
+{
+  __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
+  __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
+  __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
+  __v8hi __E = vec_unpackh ((__v16qi) __B);
+  __v8hi __F = vec_unpackl ((__v16qi) __B);
+  __C = vec_mul (__C, __E);
+  __D = vec_mul (__D, __F);
+  const __v16qu __odds  =
+    {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+  const __v16qu __evens =
+    {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
+  __E = vec_perm (__C, __D, __odds);
+  __F = vec_perm (__C, __D, __evens);
+  return (__m128i) vec_adds (__E, __F);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_pi16 (__m64 __A, __m64 __B)
+{
+  __v8hi __C = (__v8hi) (__v2du) { __A, __A };
+  __C = vec_unpackl ((__v16qi) __C);
+  const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
+  __C = vec_and (__C, __unsigned);
+  __v8hi __D = (__v8hi) (__v2du) { __B, __B };
+  __D = vec_unpackl ((__v16qi) __D);
+  __D = vec_mul (__C, __D);
+  const __v16qu __odds  =
+    {  0,  1,  4,  5,  8,  9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
+  const __v16qu __evens =
+    {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
+  __C = vec_perm (__D, __D, __odds);
+  __D = vec_perm (__D, __D, __evens);
+  __C = vec_adds (__C, __D);
+  return (__m64) ((__v2du) (__C))[0];
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_epi16 (__m128i __A, __m128i __B)
+{
+  __v4si __C = vec_unpackh ((__v8hi) __A);
+  __v4si __D = vec_unpackh ((__v8hi) __B);
+  __C = vec_mul (__C, __D);
+  __D = vec_unpackl ((__v8hi) __A);
+  __v4si __E = vec_unpackl ((__v8hi) __B);
+  __D = vec_mul (__D, __E);
+  const __v4su __shift = vec_splats ((unsigned int) 14);
+  __C = vec_sr (__C, __shift);
+  __D = vec_sr (__D, __shift);
+  const __v4si __ones = vec_splats ((signed int) 1);
+  __C = vec_add (__C, __ones);
+  __C = vec_sr (__C, (__v4su) __ones);
+  __D = vec_add (__D, __ones);
+  __D = vec_sr (__D, (__v4su) __ones);
+  return (__m128i) vec_pack (__C, __D);
+}
+
+extern __inline __m64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_pi16 (__m64 __A, __m64 __B)
+{
+  __v4si __C = (__v4si) (__v2du) { __A, __A };
+  __C = vec_unpackh ((__v8hi) __C);
+  __v4si __D = (__v4si) (__v2du) { __B, __B };
+  __D = vec_unpackh ((__v8hi) __D);
+  __C = vec_mul (__C, __D);
+  const __v4su __shift = vec_splats ((unsigned int) 14);
+  __C = vec_sr (__C, __shift);
+  const __v4si __ones = vec_splats ((signed int) 1);
+  __C = vec_add (__C, __ones);
+  __C = vec_sr (__C, (__v4su) __ones);
+  __v8hi __E = vec_pack (__C, __D);
+  return (__m64) ((__v2du) (__E))[0];
+}
+
+#else
+#include_next <tmmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
+#endif /* TMMINTRIN_H_ */
diff --git a/lib/include/ppc_wrappers/xmmintrin.h b/lib/include/ppc_wrappers/xmmintrin.h
index 1b322b6651..0f429fa040 100644
--- a/lib/include/ppc_wrappers/xmmintrin.h
+++ b/lib/include/ppc_wrappers/xmmintrin.h
@@ -34,6 +34,8 @@
 #ifndef _XMMINTRIN_H_INCLUDED
 #define _XMMINTRIN_H_INCLUDED
 
+#if defined(__linux__) && defined(__ppc64__)
+
 /* Define four value permute mask */
 #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
 
@@ -1835,4 +1837,8 @@ do {									\
 /* For backward source compatibility.  */
 //# include <emmintrin.h>
 
+#else
+#include_next <xmmintrin.h>
+#endif /* defined(__linux__) && defined(__ppc64__) */
+
 #endif /* _XMMINTRIN_H_INCLUDED */
author	Andrew Kelley <andrew@ziglang.org>	2020-01-22 17:42:02 -0500
committer	Andrew Kelley <andrew@ziglang.org>	2020-01-22 17:42:44 -0500
commit	74872263cc83bb7cbd8d548e83b441f4bf2f2249 (patch)
tree	6c4e7bb2cbcb97d3b16259ecebcb05915921ecfe /lib/include/ppc_wrappers
parent	97b2ac598b91194c09a96c9ed86e4f9b266d019c (diff)
download	zig-74872263cc83bb7cbd8d548e83b441f4bf2f2249.tar.gz zig-74872263cc83bb7cbd8d548e83b441f4bf2f2249.zip