diff options
Diffstat (limited to 'lib/libc/include/aarch64-macos-gnu/simd')
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/common.h | 4458 | ||||
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/conversion.h | 1966 | ||||
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/logic.h | 1315 | ||||
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/math.h | 5380 | ||||
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/packed.h | 1031 | ||||
| -rw-r--r-- | lib/libc/include/aarch64-macos-gnu/simd/quaternion.h | 1194 |
6 files changed, 15344 insertions, 0 deletions
diff --git a/lib/libc/include/aarch64-macos-gnu/simd/common.h b/lib/libc/include/aarch64-macos-gnu/simd/common.h new file mode 100644 index 0000000000..5408c535fd --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/common.h @@ -0,0 +1,4458 @@ +/*! @header + * The interfaces declared in this header provide "common" elementwise + * operations that are neither math nor logic functions. These are available + * only for floating-point vectors and scalars, except for min, max, abs, + * clamp, and the reduce operations, which also support integer vectors. + * + * simd_abs(x) Absolute value of x. Also available as fabs + * for floating-point vectors. If x is the + * smallest signed integer, x is returned. + * + * simd_max(x,y) Returns the maximum of x and y. Also available + * as fmax for floating-point vectors. + * + * simd_min(x,y) Returns the minimum of x and y. Also available + * as fmin for floating-point vectors. + * + * simd_clamp(x,min,max) x clamped to the range [min, max]. + * + * simd_sign(x) -1 if x is less than zero, 0 if x is zero or + * NaN, and +1 if x is greater than zero. + * + * simd_mix(x,y,t) If t is not in the range [0,1], the result is + * undefined. Otherwise the result is x+(y-x)*t, + * which linearly interpolates between x and y. + * + * simd_recip(x) An approximation to 1/x. If x is very near the + * limits of representable values, or is infinity + * or NaN, the result is undefined. There are + * two variants of this function: + * + * simd_precise_recip(x) + * + * and + * + * simd_fast_recip(x). + * + * The "precise" variant is accurate to a few ULPs, + * whereas the "fast" variant may have as little + * as 11 bits of accuracy in float and about 22 + * bits in double. + * + * The function simd_recip(x) resolves to + * simd_precise_recip(x) ordinarily, but to + * simd_fast_recip(x) when used in a translation + * unit compiled with -ffast-math (when + * -ffast-math is in effect, you may still use the + * precise version of this function by calling it + * explicitly by name). + * + * simd_rsqrt(x) An approximation to 1/sqrt(x). If x is + * infinity or NaN, the result is undefined. + * There are two variants of this function: + * + * simd_precise_rsqrt(x) + * + * and + * + * simd_fast_rsqrt(x). + * + * The "precise" variant is accurate to a few ULPs, + * whereas the "fast" variant may have as little + * as 11 bits of accuracy in float and about 22 + * bits in double. + * + * The function simd_rsqrt(x) resolves to + * simd_precise_rsqrt(x) ordinarily, but to + * simd_fast_rsqrt(x) when used in a translation + * unit compiled with -ffast-math (when + * -ffast-math is in effect, you may still use the + * precise version of this function by calling it + * explicitly by name). + * + * simd_fract(x) The "fractional part" of x, which lies strictly + * in the range [0, 0x1.fffffep-1]. + * + * simd_step(edge,x) 0 if x < edge, and 1 otherwise. + * + * simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and + * a Hermite interpolation between 0 and 1 if + * edge0 < x < edge1. + * + * simd_reduce_add(x) Sum of the elements of x. + * + * simd_reduce_min(x) Minimum of the elements of x. + * + * simd_reduce_max(x) Maximum of the elements of x. + * + * simd_equal(x,y) True if and only if every lane of x is equal + * to the corresponding lane of y. + * + * The following common functions are available in the simd:: namespace: + * + * C++ Function Equivalent C Function + * -------------------------------------------------------------------- + * simd::abs(x) simd_abs(x) + * simd::max(x,y) simd_max(x,y) + * simd::min(x,y) simd_min(x,y) + * simd::clamp(x,min,max) simd_clamp(x,min,max) + * simd::sign(x) simd_sign(x) + * simd::mix(x,y,t) simd_mix(x,y,t) + * simd::recip(x) simd_recip(x) + * simd::rsqrt(x) simd_rsqrt(x) + * simd::fract(x) simd_fract(x) + * simd::step(edge,x) simd_step(edge,x) + * simd::smoothstep(e0,e1,x) simd_smoothstep(e0,e1,x) + * simd::reduce_add(x) simd_reduce_add(x) + * simd::reduce_max(x) simd_reduce_max(x) + * simd::reduce_min(x) simd_reduce_min(x) + * simd::equal(x,y) simd_equal(x,y) + * + * simd::precise::recip(x) simd_precise_recip(x) + * simd::precise::rsqrt(x) simd_precise_rsqrt(x) + * + * simd::fast::recip(x) simd_fast_recip(x) + * simd::fast::rsqrt(x) simd_fast_rsqrt(x) + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_COMMON_HEADER +#define SIMD_COMMON_HEADER + +#include <simd/base.h> +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include <simd/vector_make.h> +#include <simd/logic.h> +#include <simd/math.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x); +/*! @abstract The elementwise absolute value of x. + * @discussion Deprecated. Use simd_abs(x) instead. */ +#define vector_abs simd_abs + +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC float simd_max(float x, float y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC double simd_max(double x, double y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y); +/*! @abstract The elementwise maximum of x and y. + * @discussion Deprecated. Use simd_max(x,y) instead. */ +#define vector_max simd_max + +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC float simd_min(float x, float y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC double simd_min(double x, double y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y); +/*! @abstract The elementwise minimum of x and y. + * @discussion Deprecated. Use simd_min(x,y) instead. */ +#define vector_min simd_min + + +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC float simd_clamp(float x, float min, float max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC double simd_clamp(double x, double min, double max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Deprecated. Use simd_clamp(x,min,max) instead. */ +#define vector_clamp simd_clamp + +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC float simd_sign(float x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC double simd_sign(double x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. + * @discussion Deprecated. Use simd_sign(x) instead. */ +#define vector_sign simd_sign + +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC float simd_mix(float x, float y, float t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC double simd_mix(double x, double y, double t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 + * @discussion Deprecated. Use simd_mix(x, y, t) instead. */ +#define vector_mix simd_mix + +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC float simd_precise_recip(float x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC double simd_precise_recip(double x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x); +/*! @abstract A good approximation to 1/x. + * @discussion Deprecated. Use simd_precise_recip(x) instead. */ +#define vector_precise_recip simd_precise_recip + +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC float simd_fast_recip(float x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC double simd_fast_recip(double x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x); +/*! @abstract A fast approximation to 1/x. + * @discussion Deprecated. Use simd_fast_recip(x) instead. */ +#define vector_fast_recip simd_fast_recip + +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC float simd_recip(float x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC double simd_recip(double x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x); +/*! @abstract An approximation to 1/x. + * @discussion Deprecated. Use simd_recip(x) instead. */ +#define vector_recip simd_recip + +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC float simd_precise_rsqrt(float x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC double simd_precise_rsqrt(double x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_precise_rsqrt(x) instead. */ +#define vector_precise_rsqrt simd_precise_rsqrt + +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC float simd_fast_rsqrt(float x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC double simd_fast_rsqrt(double x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_fast_rsqrt(x) instead. */ +#define vector_fast_rsqrt simd_fast_rsqrt + +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC float simd_rsqrt(float x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC double simd_rsqrt(double x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_rsqrt(x) instead. */ +#define vector_rsqrt simd_rsqrt + +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC float simd_fract(float x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC double simd_fract(double x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion Deprecated. Use simd_fract(x) instead. */ +#define vector_fract simd_fract + +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC float simd_step(float edge, float x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC double simd_step(double edge, double x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Deprecated. Use simd_step(edge, x) instead. */ +#define vector_step simd_step + +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead. */ +#define vector_smoothstep simd_smoothstep + +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x); +/*! @abstract Sum of elements in x. + * @discussion Deprecated. Use simd_add(x) instead. */ +#define vector_reduce_add simd_reduce_add + +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x); +/*! @abstract Minimum of elements in x. + * @discussion Deprecated. Use simd_min(x) instead. */ +#define vector_reduce_min simd_reduce_min + +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x); +/*! @abstract Maximum of elements in x. + * @discussion Deprecated. Use simd_max(x) instead. */ +#define vector_reduce_max simd_reduce_max + +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) { + return simd_all(x == y); +} + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + /*! @abstract The lanewise absolute value of x. */ + template <typename typeN> static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); } + /*! @abstract The lanewise maximum of x and y. */ + template <typename typeN> static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); } + /*! @abstract The lanewise minimum of x and y. */ + template <typename typeN> static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); } + /*! @abstract x clamped to the interval [min, max]. */ + template <typename typeN> static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); } + /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); } + /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); } + /*! @abstract An approximation to 1/x. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); } + /*! @abstract The "fracional part" of x, in the range [0,1). */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); } + /*! @abstract 0 if x < edge, 1 otherwise. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); } + /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); } + /*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. + * + * @discussion This isn't operator== because that's already defined by + * the compiler to return a lane mask. */ + template <typename fptypeN> static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); } +#if __cpp_decltype_auto + /* If you are targeting an earlier version of the C++ standard that lacks + decltype_auto support, you may use the C-style simd_reduce_* functions + instead. */ + /*! @abstract The sum of the elements in x. May overflow. */ + template <typename typeN> static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); } + /*! @abstract The least element in x. */ + template <typename typeN> static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); } + /*! @abstract The greatest element in x. */ + template <typename typeN> static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); } +#endif + namespace precise { + /*! @abstract An approximation to 1/x. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); } + } + namespace fast { + /*! @abstract An approximation to 1/x. */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); } + } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementation + +static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) { + return simd_make_char2(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) { + return simd_make_char3(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) { + return simd_make_char4(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s8(x); +#else + return simd_make_char8(simd_abs(simd_make_char16_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s8(x); +#elif defined __SSE4_1__ + return (simd_char16) _mm_abs_epi8((__m128i)x); +#else + simd_char16 mask = x >> 7; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) { +#if defined __AVX2__ + return _mm256_abs_epi8(x); +#else + return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) { +#if defined __AVX512BW__ + return _mm512_abs_epi8(x); +#else + return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) { + return simd_make_short2(simd_abs(simd_make_short4_undef(x))); +} + +static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) { + return simd_make_short3(simd_abs(simd_make_short4_undef(x))); +} + +static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s16(x); +#else + return simd_make_short4(simd_abs(simd_make_short8_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s16(x); +#elif defined __SSE4_1__ + return (simd_short8) _mm_abs_epi16((__m128i)x); +#else + simd_short8 mask = x >> 15; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) { +#if defined __AVX2__ + return _mm256_abs_epi16(x); +#else + return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) { +#if defined __AVX512BW__ + return _mm512_abs_epi16(x); +#else + return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s32(x); +#else + return simd_make_int2(simd_abs(simd_make_int4_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) { + return simd_make_int3(simd_abs(simd_make_int4_undef(x))); +} + +static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s32(x); +#elif defined __SSE4_1__ + return (simd_int4) _mm_abs_epi32((__m128i)x); +#else + simd_int4 mask = x >> 31; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) { +#if defined __AVX2__ + return _mm256_abs_epi32(x); +#else + return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) { +#if defined __AVX512F__ + return _mm512_abs_epi32(x); +#else + return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) { +#if defined __arm64__ + return vabsq_s64(x); +#elif defined __SSE4_1__ + return (simd_long2) _mm_abs_epi64((__m128i)x); +#else + simd_long2 mask = x >> 63; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) { + return simd_make_long3(simd_abs(simd_make_long4_undef(x))); +} + +static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) { +#if defined __AVX2__ + return _mm256_abs_epi64(x); +#else + return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) { +#if defined __AVX512F__ + return _mm512_abs_epi64(x); +#else + return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) { + return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) { + return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) { + return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s8(x, y); +#else + return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s8(x, y); +#elif defined __SSE4_1__ + return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) { +#if defined __AVX2__ + return _mm256_min_epi8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) { +#if defined __AVX512BW__ + return _mm512_min_epi8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) { + return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) { + return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) { + return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u8(x, y); +#else + return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u8(x, y); +#elif defined __SSE4_1__ + return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) { +#if defined __AVX2__ + return _mm256_min_epu8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) { +#if defined __AVX512BW__ + return _mm512_min_epu8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) { + return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) { + return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s16(x, y); +#else + return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s16(x, y); +#elif defined __SSE4_1__ + return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) { +#if defined __AVX2__ + return _mm256_min_epi16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) { +#if defined __AVX512BW__ + return _mm512_min_epi16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) { + return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) { + return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u16(x, y); +#else + return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u16(x, y); +#elif defined __SSE4_1__ + return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) { +#if defined __AVX2__ + return _mm256_min_epu16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) { +#if defined __AVX512BW__ + return _mm512_min_epu16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s32(x, y); +#else + return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) { + return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); +} + +static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s32(x, y); +#elif defined __SSE4_1__ + return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) { +#if defined __AVX2__ + return _mm256_min_epi32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) { +#if defined __AVX512F__ + return _mm512_min_epi32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u32(x, y); +#else + return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) { + return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +} + +static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u32(x, y); +#elif defined __SSE4_1__ + return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) { +#if defined __AVX2__ + return _mm256_min_epu32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) { +#if defined __AVX512F__ + return _mm512_min_epu32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC float simd_min(float x, float y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) { +#if defined __AVX512VL__ + return _mm_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) { + return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y))); +} + +static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) { +#if defined __AVX512VL__ + return _mm256_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) { +#if defined __AVX512F__ + return _mm512_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) { +#if defined __AVX512VL__ + return _mm_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) { + return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); +} + +static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) { +#if defined __AVX512VL__ + return _mm256_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) { +#if defined __AVX512F__ + return _mm512_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC double simd_min(double x, double y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) { + return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) { + return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) { + return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s8(x, y); +#else + return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s8(x, y); +#elif defined __SSE4_1__ + return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) { +#if defined __AVX2__ + return _mm256_max_epi8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) { +#if defined __AVX512BW__ + return _mm512_max_epi8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) { + return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) { + return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) { + return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u8(x, y); +#else + return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u8(x, y); +#elif defined __SSE4_1__ + return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) { +#if defined __AVX2__ + return _mm256_max_epu8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) { +#if defined __AVX512BW__ + return _mm512_max_epu8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) { + return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) { + return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s16(x, y); +#else + return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s16(x, y); +#elif defined __SSE4_1__ + return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) { +#if defined __AVX2__ + return _mm256_max_epi16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) { +#if defined __AVX512BW__ + return _mm512_max_epi16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) { + return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) { + return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u16(x, y); +#else + return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u16(x, y); +#elif defined __SSE4_1__ + return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) { +#if defined __AVX2__ + return _mm256_max_epu16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) { +#if defined __AVX512BW__ + return _mm512_max_epu16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s32(x, y); +#else + return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) { + return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); +} + +static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s32(x, y); +#elif defined __SSE4_1__ + return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) { +#if defined __AVX2__ + return _mm256_max_epi32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) { +#if defined __AVX512F__ + return _mm512_max_epi32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u32(x, y); +#else + return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) { + return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +} + +static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u32(x, y); +#elif defined __SSE4_1__ + return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) { +#if defined __AVX2__ + return _mm256_max_epu32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) { +#if defined __AVX512F__ + return _mm512_max_epu32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC float simd_max(float x, float y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) { +#if defined __AVX512VL__ + return _mm_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) { + return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y))); +} + +static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) { +#if defined __AVX512VL__ + return _mm256_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) { +#if defined __AVX512F__ + return _mm512_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) { +#if defined __AVX512VL__ + return _mm_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) { + return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); +} + +static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) { +#if defined __AVX512VL__ + return _mm256_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) { +#if defined __AVX512F__ + return _mm512_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC double simd_max(double x, double y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) { + return simd_min(simd_max(x, min), max); +} + + +static inline SIMD_CFUNC float simd_sign(float x) { + return (x == 0 | x != x) ? 0 : copysign(1,x); +} + +static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC double simd_sign(double x) { + return (x == 0 | x != x) ? 0 : copysign(1,x); +} + +static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC float simd_mix(float x, float y, float t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC double simd_mix(double x, double y, double t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC float simd_recip(float x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC double simd_recip(double x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC float simd_fast_recip(float x) { +#if defined __AVX512VL__ + simd_float4 x4 = simd_make_float4(x); + return ((simd_float4)_mm_rcp14_ss(x4, x4)).x; +#elif defined __SSE__ + return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x; +#elif defined __ARM_NEON__ + return simd_fast_recip(simd_make_float2_undef(x)).x; +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = vrecpe_f32(x); + return r * vrecps_f32(x, r); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) { + return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) { +#if defined __AVX512VL__ + return _mm_rcp14_ps(x); +#elif defined __SSE__ + return _mm_rcp_ps(x); +#elif defined __ARM_NEON__ + simd_float4 r = vrecpeq_f32(x); + return r * vrecpsq_f32(x, r); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) { +#if defined __AVX512VL__ + return _mm256_rcp14_ps(x); +#elif defined __AVX__ + return _mm256_rcp_ps(x); +#else + return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) { +#if defined __AVX512F__ + return _mm512_rcp14_ps(x); +#else + return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_fast_recip(double x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC float simd_precise_recip(float x) { +#if defined __SSE__ + float r = simd_fast_recip(x); + return r*(2 - (x == 0 ? -INFINITY : x)*r); +#elif defined __ARM_NEON__ + return simd_precise_recip(simd_make_float2_undef(x)).x; +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = simd_fast_recip(x); + return r*vrecps_f32(x, r); +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) { + return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) { +#if defined __SSE__ + simd_float4 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#elif defined __ARM_NEON__ + simd_float4 r = simd_fast_recip(x); + return r*vrecpsq_f32(x, r); +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) { +#if defined __AVX__ + simd_float8 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#else + return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) { +#if defined __AVX512F__ + simd_float16 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#else + return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_precise_recip(double x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) { + return 1/x; +} + +static inline SIMD_CFUNC float simd_rsqrt(float x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC double simd_rsqrt(double x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC float simd_fast_rsqrt(float x) { +#if defined __AVX512VL__ + simd_float4 x4 = simd_make_float4(x); + return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x; +#elif defined __SSE__ + return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x; +#elif defined __ARM_NEON__ + return simd_fast_rsqrt(simd_make_float2_undef(x)).x; +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = vrsqrte_f32(x); + return r * vrsqrts_f32(x, r*r); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) { + return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) { +#if defined __AVX512VL__ + return _mm_rsqrt14_ps(x); +#elif defined __SSE__ + return _mm_rsqrt_ps(x); +#elif defined __ARM_NEON__ + simd_float4 r = vrsqrteq_f32(x); + return r * vrsqrtsq_f32(x, r*r); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) { +#if defined __AVX512VL__ + return _mm256_rsqrt14_ps(x); +#elif defined __AVX__ + return _mm256_rsqrt_ps(x); +#else + return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) { +#if defined __AVX512F__ + return _mm512_rsqrt14_ps(x); +#else + return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_fast_rsqrt(double x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC float simd_precise_rsqrt(float x) { +#if defined __SSE__ + float r = simd_fast_rsqrt(x); + return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r); +#elif defined __ARM_NEON__ + return simd_precise_rsqrt(simd_make_float2_undef(x)).x; +#else + return 1/sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = simd_fast_rsqrt(x); + return r*vrsqrts_f32(x, r*r); +#else + return 1/__tg_sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) { + return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) { +#if defined __SSE__ + simd_float4 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#elif defined __ARM_NEON__ + simd_float4 r = simd_fast_rsqrt(x); + return r*vrsqrtsq_f32(x, r*r); +#else + return 1/__tg_sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) { +#if defined __AVX__ + simd_float8 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#else + return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) { +#if defined __AVX512F__ + simd_float16 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#else + return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_precise_rsqrt(double x) { + return 1/sqrt(x); +} + +static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC float simd_fract(float x) { + return fmin(x - floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC double simd_fract(double x) { + return fmin(x - floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC float simd_step(float edge, float x) { + return !(x < edge); +} + +static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) { + return simd_bitselect((simd_float2)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) { + return simd_bitselect((simd_float3)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) { + return simd_bitselect((simd_float4)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) { + return simd_bitselect((simd_float8)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) { + return simd_bitselect((simd_float16)1, 0, x < edge); +} + +static inline SIMD_CFUNC double simd_step(double edge, double x) { + return !(x < edge); +} + +static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) { + return simd_bitselect((simd_double2)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) { + return simd_bitselect((simd_double3)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) { + return simd_bitselect((simd_double4)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) { + return simd_bitselect((simd_double8)1, 0, x < edge); +} + +static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) { + float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) { + simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) { + simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) { + simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) { + simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) { + simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) { + double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) { + simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) { + simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) { + simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) { + simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) { + char t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) { + unsigned char t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) { + short t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) { + unsigned short t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) { + int t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) { + unsigned int t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) { + return fmin(x.x, x.y); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) { + return fmin(fmin(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) { + simd_long1 t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) { + simd_ulong1 t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) { + return fmin(x.x, x.y); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) { + return fmin(fmin(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) { + char t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) { + unsigned char t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) { + short t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) { + unsigned short t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) { + int t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) { + unsigned int t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) { + return fmax(x.x, x.y); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) { + return fmax(fmax(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) { + simd_long1 t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) { + simd_ulong1 t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) { + return fmax(x.x, x.y); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) { + return fmax(fmax(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_COMMON_HEADER */
\ No newline at end of file diff --git a/lib/libc/include/aarch64-macos-gnu/simd/conversion.h b/lib/libc/include/aarch64-macos-gnu/simd/conversion.h new file mode 100644 index 0000000000..6379afde05 --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/conversion.h @@ -0,0 +1,1966 @@ +/* Copyright (c) 2014-2017 Apple, Inc. All rights reserved. + * + * The interfaces declared in this header provide conversions between vector + * types. The following functions are available: + * + * simd_char(x) simd_uchar(x) + * simd_short(x) simd_ushort(x) + * simd_int(x) simd_uint(x) + * simd_long(x) simd_ulong(x) + * simd_float(x) + * simd_double(x) + * + * Each of these functions converts x to a vector whose elements have the + * type named by the function, with the same number of elements as x. Unlike + * a vector cast, these functions convert the elements to the new element + * type. These conversions behave exactly as C scalar conversions, except + * that conversions from integer vector types to signed integer vector types + * are guaranteed to wrap modulo 2^N (where N is the number of bits in an + * element of the result type). + * + * For integer vector types, saturating conversions are also available: + * + * simd_char_sat(x) simd_uchar_sat(x) + * simd_short_sat(x) simd_ushort_sat(x) + * simd_int_sat(x) simd_uint_sat(x) + * simd_long_sat(x) simd_ulong_sat(x) + * + * These conversions clamp x to the representable range of the result type + * before converting. + * + * Unlike most vector operations in <simd/>, there are no abbreviated C++ + * names for these functions in the simd:: namespace. + */ + +#ifndef __SIMD_CONVERSION_HEADER__ +#define __SIMD_CONVERSION_HEADER__ + +#include <simd/base.h> +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include <simd/vector_types.h> +#include <simd/common.h> +#include <simd/logic.h> + +#ifdef __cplusplus +extern "C" { +#endif + +static simd_char2 SIMD_CFUNC simd_char(simd_char2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_char3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_char4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_char8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_char16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_char32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_uchar2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_uchar3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_uchar4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_uchar8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_uchar16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_uchar32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_short2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_short3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_short4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_short8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_short16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_short32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_ushort2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_ushort3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_ushort4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_ushort8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_ushort16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_ushort32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_int2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_int3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_int4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_int8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_int16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_uint2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_uint3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_uint4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_uint8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_uint16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_float2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_float3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_float4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_float8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_float16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_long2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_long3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_long4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_long8 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_ulong2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_ulong3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_ulong4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_ulong8 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_double2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_double3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_double4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_double8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_char2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_char3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_char4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_char8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_char16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_char32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_short2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_short3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_short4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_short8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_short16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_short32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_int2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_int3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_int4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_int8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_int16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_float2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_float3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_float4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_float8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_float16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_long2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_long3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_long4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_long8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_double2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_double3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_double4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_double8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uchar2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uchar3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uchar4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uchar8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uchar16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_uchar32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ushort2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ushort3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ushort4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ushort8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_ushort16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_ushort32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uint2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uint3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uint4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uint8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uint16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ulong2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ulong3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ulong4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ulong8 __x); +#define vector_char simd_char +#define vector_char_sat simd_char_sat + +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_char2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_char3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_char4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_char8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_char16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_char32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uchar2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uchar3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uchar4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uchar8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uchar16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_uchar32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_short2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_short3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_short4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_short8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_short16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_short32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ushort2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ushort3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ushort4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ushort8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_ushort16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_ushort32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_int2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_int3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_int4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_int8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_int16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uint2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uint3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uint4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uint8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uint16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_float2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_float3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_float4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_float8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_float16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_long2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_long3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_long4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_long8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ulong2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ulong3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ulong4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ulong8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_double2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_double3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_double4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_double8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_char2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_char3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_char4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_char8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_char16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_char32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_short2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_short3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_short4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_short8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_short16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_short32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_int2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_int3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_int4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_int8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_int16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_float2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_float3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_float4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_float8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_float16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_long2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_long3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_long4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_long8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_double2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_double3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_double4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_double8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uchar2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uchar3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uchar4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uchar8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uchar16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_uchar32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ushort2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ushort3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ushort4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ushort8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_ushort16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_ushort32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uint2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uint3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uint4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uint8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uint16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ulong2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ulong3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ulong4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ulong8 __x); +#define vector_uchar simd_uchar +#define vector_uchar_sat simd_uchar_sat + +static simd_short2 SIMD_CFUNC simd_short(simd_char2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_char3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_char4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_char8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_char16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_char32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_uchar2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_uchar3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_uchar4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_uchar8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_uchar16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_uchar32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_short2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_short3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_short4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_short8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_short16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_short32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_ushort2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_ushort3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_ushort4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_ushort8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_ushort16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_ushort32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_int2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_int3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_int4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_int8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_int16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_uint2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_uint3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_uint4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_uint8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_uint16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_float2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_float3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_float4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_float8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_float16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_long2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_long3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_long4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_long8 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_ulong2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_ulong3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_ulong4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_ulong8 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_double2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_double3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_double4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_double8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_char2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_char3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_char4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_char8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_char16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_char32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_short2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_short3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_short4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_short8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_short16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_short32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_int2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_int3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_int4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_int8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_int16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_float2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_float3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_float4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_float8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_float16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_long2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_long3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_long4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_long8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_double2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_double3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_double4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_double8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uchar2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uchar3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uchar4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uchar8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uchar16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_uchar32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ushort2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ushort3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ushort4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ushort8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_ushort16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_ushort32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uint2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uint3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uint4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uint8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uint16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ulong2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ulong3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ulong4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ulong8 __x); +#define vector_short simd_short +#define vector_short_sat simd_short_sat + +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_char2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_char3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_char4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_char8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_char16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_char32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uchar2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uchar3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uchar4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uchar8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uchar16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_uchar32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_short2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_short3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_short4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_short8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_short16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_short32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ushort2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ushort3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ushort4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ushort8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_ushort16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_ushort32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_int2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_int3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_int4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_int8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_int16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uint2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uint3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uint4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uint8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uint16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_float2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_float3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_float4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_float8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_float16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_long2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_long3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_long4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_long8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ulong2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ulong3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ulong4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ulong8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_double2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_double3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_double4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_double8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_char2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_char3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_char4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_char8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_char16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_char32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_short2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_short3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_short4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_short8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_short16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_short32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_int2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_int3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_int4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_int8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_int16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_float2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_float3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_float4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_float8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_float16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_long2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_long3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_long4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_long8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_double2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_double3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_double4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_double8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uchar2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uchar3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uchar4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uchar8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uchar16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_uchar32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ushort2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ushort3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ushort4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ushort8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_ushort16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_ushort32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uint2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uint3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uint4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uint8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uint16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ulong2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ulong3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ulong4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ulong8 __x); +#define vector_ushort simd_ushort +#define vector_ushort_sat simd_ushort_sat + +static simd_int2 SIMD_CFUNC simd_int(simd_char2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_char3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_char4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_char8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_char16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_uchar2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_uchar3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_uchar4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_uchar8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_uchar16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_short2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_short3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_short4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_short8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_short16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_ushort2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_ushort3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_ushort4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_ushort8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_ushort16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_int2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_int3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_int4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_int8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_int16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_uint2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_uint3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_uint4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_uint8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_uint16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_float16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_long2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_long3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_long4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_long8 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_ulong2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_ulong3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_ulong4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_ulong8 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_double2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_double3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_double4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_double8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_char2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_char3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_char4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_char8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_char16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_short2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_short3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_short4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_short8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_short16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_int2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_int3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_int4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_int8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_int16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_float16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_long2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_long3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_long4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_long8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_double2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_double3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_double4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_double8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uchar2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uchar3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uchar4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uchar8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uchar16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ushort2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ushort3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ushort4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ushort8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_ushort16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uint2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uint3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uint4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uint8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uint16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ulong2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ulong3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ulong4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ulong8 __x); +static simd_int2 SIMD_CFUNC simd_int_rte(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int_rte(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int_rte(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int_rte(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int_rte(simd_float16 __x); +#define vector_int simd_int +#define vector_int_sat simd_int_sat + +static simd_uint2 SIMD_CFUNC simd_uint(simd_char2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_char3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_char4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_char8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_char16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_uchar2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_uchar3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_uchar4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_uchar8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_uchar16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_short2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_short3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_short4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_short8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_short16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_ushort2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_ushort3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_ushort4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_ushort8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_ushort16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_int2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_int3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_int4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_int8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_int16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_uint2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_uint3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_uint4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_uint8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_uint16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_float2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_float3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_float4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_float8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_float16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_long2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_long3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_long4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_long8 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_ulong2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_ulong3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_ulong4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_ulong8 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_double2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_double3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_double4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_double8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_char2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_char3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_char4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_char8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_char16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_short2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_short3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_short4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_short8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_short16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_int2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_int3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_int4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_int8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_int16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_float2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_float3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_float4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_float8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_float16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_long2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_long3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_long4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_long8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_double2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_double3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_double4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_double8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uchar2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uchar3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uchar4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uchar8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uchar16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ushort2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ushort3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ushort4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ushort8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_ushort16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uint2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uint3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uint4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uint8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uint16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ulong2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ulong3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ulong4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ulong8 __x); +#define vector_uint simd_uint +#define vector_uint_sat simd_uint_sat + +static simd_float2 SIMD_CFUNC simd_float(simd_char2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_char3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_char4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_char8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_char16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_uchar2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_uchar3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_uchar4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_uchar8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_uchar16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_short2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_short3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_short4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_short8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_short16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_ushort2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_ushort3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_ushort4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_ushort8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_ushort16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_int2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_int3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_int4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_int8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_int16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_uint2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_uint3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_uint4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_uint8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_uint16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_float2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_float3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_float4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_float8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_float16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_long2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_long3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_long4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_long8 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_ulong2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_ulong3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_ulong4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_ulong8 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_double2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_double3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_double4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_double8 __x); +#define vector_float simd_float + +static simd_long2 SIMD_CFUNC simd_long(simd_char2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_char3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_char4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_char8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_uchar2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_uchar3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_uchar4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_uchar8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_short2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_short3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_short4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_short8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_ushort2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_ushort3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_ushort4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_ushort8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_int2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_int3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_int4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_int8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_uint2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_uint3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_uint4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_uint8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_float2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_float3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_float4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_float8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_long2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_long3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_long4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_long8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_ulong2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_ulong3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_ulong4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_ulong8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_double8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_char2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_char3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_char4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_char8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_short2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_short3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_short4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_short8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_int2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_int3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_int4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_int8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_float2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_float3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_float4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_float8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_long2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_long3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_long4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_long8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_double8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uchar2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uchar3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uchar4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uchar8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ushort2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ushort3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ushort4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ushort8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uint2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uint3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uint4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uint8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ulong2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ulong3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ulong4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ulong8 __x); +static simd_long2 SIMD_CFUNC simd_long_rte(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long_rte(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long_rte(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long_rte(simd_double8 __x); +#define vector_long simd_long +#define vector_long_sat simd_long_sat + +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_char2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_char3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_char4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_char8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uchar2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uchar3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uchar4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uchar8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_short2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_short3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_short4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_short8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ushort2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ushort3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ushort4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ushort8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_int2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_int3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_int4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_int8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uint2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uint3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uint4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uint8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_float2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_float3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_float4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_float8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_long2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_long3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_long4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_long8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ulong2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ulong3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ulong4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ulong8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_double2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_double3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_double4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_double8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_char2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_char3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_char4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_char8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_short2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_short3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_short4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_short8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_int2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_int3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_int4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_int8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_float2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_float3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_float4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_float8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_long2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_long3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_long4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_long8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_double2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_double3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_double4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_double8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uchar2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uchar3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uchar4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uchar8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ushort2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ushort3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ushort4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ushort8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uint2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uint3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uint4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uint8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ulong2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ulong3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ulong4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ulong8 __x); +#define vector_ulong simd_ulong +#define vector_ulong_sat simd_ulong_sat + +static simd_double2 SIMD_CFUNC simd_double(simd_char2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_char3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_char4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_char8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_uchar2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_uchar3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_uchar4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_uchar8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_short2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_short3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_short4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_short8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_ushort2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_ushort3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_ushort4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_ushort8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_int2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_int3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_int4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_int8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_uint2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_uint3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_uint4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_uint8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_float2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_float3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_float4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_float8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_long2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_long3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_long4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_long8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_ulong2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_ulong3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_ulong4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_ulong8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_double2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_double3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_double4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_double8 __x); +#define vector_double simd_double + +static simd_char2 SIMD_CFUNC vector2(char __x, char __y) { return ( simd_char2){__x, __y}; } +static simd_uchar2 SIMD_CFUNC vector2(unsigned char __x, unsigned char __y) { return ( simd_uchar2){__x, __y}; } +static simd_short2 SIMD_CFUNC vector2(short __x, short __y) { return ( simd_short2){__x, __y}; } +static simd_ushort2 SIMD_CFUNC vector2(unsigned short __x, unsigned short __y) { return (simd_ushort2){__x, __y}; } +static simd_int2 SIMD_CFUNC vector2(int __x, int __y) { return ( simd_int2){__x, __y}; } +static simd_uint2 SIMD_CFUNC vector2(unsigned int __x, unsigned int __y) { return ( simd_uint2){__x, __y}; } +static simd_float2 SIMD_CFUNC vector2(float __x, float __y) { return ( simd_float2){__x, __y}; } +static simd_long2 SIMD_CFUNC vector2(simd_long1 __x, simd_long1 __y) { return ( simd_long2){__x, __y}; } +static simd_ulong2 SIMD_CFUNC vector2(simd_ulong1 __x, simd_ulong1 __y) { return ( simd_ulong2){__x, __y}; } +static simd_double2 SIMD_CFUNC vector2(double __x, double __y) { return (simd_double2){__x, __y}; } + +static simd_char3 SIMD_CFUNC vector3(char __x, char __y, char __z) { return ( simd_char3){__x, __y, __z}; } +static simd_uchar3 SIMD_CFUNC vector3(unsigned char __x, unsigned char __y, unsigned char __z) { return ( simd_uchar3){__x, __y, __z}; } +static simd_short3 SIMD_CFUNC vector3(short __x, short __y, short __z) { return ( simd_short3){__x, __y, __z}; } +static simd_ushort3 SIMD_CFUNC vector3(unsigned short __x, unsigned short __y, unsigned short __z) { return (simd_ushort3){__x, __y, __z}; } +static simd_int3 SIMD_CFUNC vector3(int __x, int __y, int __z) { return ( simd_int3){__x, __y, __z}; } +static simd_uint3 SIMD_CFUNC vector3(unsigned int __x, unsigned int __y, unsigned int __z) { return ( simd_uint3){__x, __y, __z}; } +static simd_float3 SIMD_CFUNC vector3(float __x, float __y, float __z) { return ( simd_float3){__x, __y, __z}; } +static simd_long3 SIMD_CFUNC vector3(simd_long1 __x, simd_long1 __y, simd_long1 __z) { return ( simd_long3){__x, __y, __z}; } +static simd_ulong3 SIMD_CFUNC vector3(simd_ulong1 __x, simd_ulong1 __y, simd_ulong1 __z) { return ( simd_ulong3){__x, __y, __z}; } +static simd_double3 SIMD_CFUNC vector3(double __x, double __y, double __z) { return (simd_double3){__x, __y, __z}; } + +static simd_char3 SIMD_CFUNC vector3(simd_char2 __xy, char __z) { simd_char3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_uchar3 SIMD_CFUNC vector3(simd_uchar2 __xy, unsigned char __z) { simd_uchar3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_short3 SIMD_CFUNC vector3(simd_short2 __xy, short __z) { simd_short3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_ushort3 SIMD_CFUNC vector3(simd_ushort2 __xy, unsigned short __z) { simd_ushort3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_int3 SIMD_CFUNC vector3(simd_int2 __xy, int __z) { simd_int3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_uint3 SIMD_CFUNC vector3(simd_uint2 __xy, unsigned int __z) { simd_uint3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_float3 SIMD_CFUNC vector3(simd_float2 __xy, float __z) { simd_float3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_long3 SIMD_CFUNC vector3(simd_long2 __xy, simd_long1 __z) { simd_long3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_ulong3 SIMD_CFUNC vector3(simd_ulong2 __xy, simd_ulong1 __z) { simd_ulong3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_double3 SIMD_CFUNC vector3(simd_double2 __xy, double __z) { simd_double3 __r; __r.xy = __xy; __r.z = __z; return __r; } + +static simd_char4 SIMD_CFUNC vector4(char __x, char __y, char __z, char __w) { return ( simd_char4){__x, __y, __z, __w}; } +static simd_uchar4 SIMD_CFUNC vector4(unsigned char __x, unsigned char __y, unsigned char __z, unsigned char __w) { return ( simd_uchar4){__x, __y, __z, __w}; } +static simd_short4 SIMD_CFUNC vector4(short __x, short __y, short __z, short __w) { return ( simd_short4){__x, __y, __z, __w}; } +static simd_ushort4 SIMD_CFUNC vector4(unsigned short __x, unsigned short __y, unsigned short __z, unsigned short __w) { return (simd_ushort4){__x, __y, __z, __w}; } +static simd_int4 SIMD_CFUNC vector4(int __x, int __y, int __z, int __w) { return ( simd_int4){__x, __y, __z, __w}; } +static simd_uint4 SIMD_CFUNC vector4(unsigned int __x, unsigned int __y, unsigned int __z, unsigned int __w) { return ( simd_uint4){__x, __y, __z, __w}; } +static simd_float4 SIMD_CFUNC vector4(float __x, float __y, float __z, float __w) { return ( simd_float4){__x, __y, __z, __w}; } +static simd_long4 SIMD_CFUNC vector4(simd_long1 __x, simd_long1 __y, simd_long1 __z, simd_long1 __w) { return ( simd_long4){__x, __y, __z, __w}; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong1 __x, simd_ulong1 __y, simd_ulong1 __z, simd_ulong1 __w) { return ( simd_ulong4){__x, __y, __z, __w}; } +static simd_double4 SIMD_CFUNC vector4(double __x, double __y, double __z, double __w) { return (simd_double4){__x, __y, __z, __w}; } + +static simd_char4 SIMD_CFUNC vector4(simd_char2 __xy, simd_char2 __zw) { simd_char4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_uchar4 SIMD_CFUNC vector4(simd_uchar2 __xy, simd_uchar2 __zw) { simd_uchar4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_short4 SIMD_CFUNC vector4(simd_short2 __xy, simd_short2 __zw) { simd_short4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_ushort4 SIMD_CFUNC vector4(simd_ushort2 __xy, simd_ushort2 __zw) { simd_ushort4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_int4 SIMD_CFUNC vector4(simd_int2 __xy, simd_int2 __zw) { simd_int4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_uint4 SIMD_CFUNC vector4(simd_uint2 __xy, simd_uint2 __zw) { simd_uint4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_float4 SIMD_CFUNC vector4(simd_float2 __xy, simd_float2 __zw) { simd_float4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_long4 SIMD_CFUNC vector4(simd_long2 __xy, simd_long2 __zw) { simd_long4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong2 __xy, simd_ulong2 __zw) { simd_ulong4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_double4 SIMD_CFUNC vector4(simd_double2 __xy, simd_double2 __zw) { simd_double4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } + +static simd_char4 SIMD_CFUNC vector4(simd_char3 __xyz, char __w) { simd_char4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_uchar4 SIMD_CFUNC vector4(simd_uchar3 __xyz, unsigned char __w) { simd_uchar4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_short4 SIMD_CFUNC vector4(simd_short3 __xyz, short __w) { simd_short4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_ushort4 SIMD_CFUNC vector4(simd_ushort3 __xyz, unsigned short __w) { simd_ushort4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_int4 SIMD_CFUNC vector4(simd_int3 __xyz, int __w) { simd_int4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_uint4 SIMD_CFUNC vector4(simd_uint3 __xyz, unsigned int __w) { simd_uint4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_float4 SIMD_CFUNC vector4(simd_float3 __xyz, float __w) { simd_float4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_long4 SIMD_CFUNC vector4(simd_long3 __xyz, simd_long1 __w) { simd_long4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong3 __xyz, simd_ulong1 __w) { simd_ulong4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_double4 SIMD_CFUNC vector4(simd_double3 __xyz, double __w) { simd_double4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } + +static simd_char8 SIMD_CFUNC vector8(simd_char4 __lo, simd_char4 __hi) { simd_char8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar8 SIMD_CFUNC vector8(simd_uchar4 __lo, simd_uchar4 __hi) { simd_uchar8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short8 SIMD_CFUNC vector8(simd_short4 __lo, simd_short4 __hi) { simd_short8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort8 SIMD_CFUNC vector8(simd_ushort4 __lo, simd_ushort4 __hi) { simd_ushort8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_int8 SIMD_CFUNC vector8(simd_int4 __lo, simd_int4 __hi) { simd_int8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uint8 SIMD_CFUNC vector8(simd_uint4 __lo, simd_uint4 __hi) { simd_uint8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_float8 SIMD_CFUNC vector8(simd_float4 __lo, simd_float4 __hi) { simd_float8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_long8 SIMD_CFUNC vector8(simd_long4 __lo, simd_long4 __hi) { simd_long8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ulong8 SIMD_CFUNC vector8(simd_ulong4 __lo, simd_ulong4 __hi) { simd_ulong8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_double8 SIMD_CFUNC vector8(simd_double4 __lo, simd_double4 __hi) { simd_double8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +static simd_char16 SIMD_CFUNC vector16(simd_char8 __lo, simd_char8 __hi) { simd_char16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar16 SIMD_CFUNC vector16(simd_uchar8 __lo, simd_uchar8 __hi) { simd_uchar16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short16 SIMD_CFUNC vector16(simd_short8 __lo, simd_short8 __hi) { simd_short16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort16 SIMD_CFUNC vector16(simd_ushort8 __lo, simd_ushort8 __hi) { simd_ushort16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_int16 SIMD_CFUNC vector16(simd_int8 __lo, simd_int8 __hi) { simd_int16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uint16 SIMD_CFUNC vector16(simd_uint8 __lo, simd_uint8 __hi) { simd_uint16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_float16 SIMD_CFUNC vector16(simd_float8 __lo, simd_float8 __hi) { simd_float16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +static simd_char32 SIMD_CFUNC vector32(simd_char16 __lo, simd_char16 __hi) { simd_char32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar32 SIMD_CFUNC vector32(simd_uchar16 __lo, simd_uchar16 __hi) { simd_uchar32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short32 SIMD_CFUNC vector32(simd_short16 __lo, simd_short16 __hi) { simd_short32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort32 SIMD_CFUNC vector32(simd_ushort16 __lo, simd_ushort16 __hi) { simd_ushort32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +#pragma mark - Implementation + +static simd_char2 SIMD_CFUNC simd_char(simd_char2 __x) { return __x; } +static simd_char3 SIMD_CFUNC simd_char(simd_char3 __x) { return __x; } +static simd_char4 SIMD_CFUNC simd_char(simd_char4 __x) { return __x; } +static simd_char8 SIMD_CFUNC simd_char(simd_char8 __x) { return __x; } +static simd_char16 SIMD_CFUNC simd_char(simd_char16 __x) { return __x; } +static simd_char32 SIMD_CFUNC simd_char(simd_char32 __x) { return __x; } +static simd_char2 SIMD_CFUNC simd_char(simd_uchar2 __x) { return (simd_char2)__x; } +static simd_char3 SIMD_CFUNC simd_char(simd_uchar3 __x) { return (simd_char3)__x; } +static simd_char4 SIMD_CFUNC simd_char(simd_uchar4 __x) { return (simd_char4)__x; } +static simd_char8 SIMD_CFUNC simd_char(simd_uchar8 __x) { return (simd_char8)__x; } +static simd_char16 SIMD_CFUNC simd_char(simd_uchar16 __x) { return (simd_char16)__x; } +static simd_char32 SIMD_CFUNC simd_char(simd_uchar32 __x) { return (simd_char32)__x; } +static simd_char2 SIMD_CFUNC simd_char(simd_short2 __x) { return __builtin_convertvector(__x & 0xff, simd_char2); } +static simd_char3 SIMD_CFUNC simd_char(simd_short3 __x) { return __builtin_convertvector(__x & 0xff, simd_char3); } +static simd_char4 SIMD_CFUNC simd_char(simd_short4 __x) { return __builtin_convertvector(__x & 0xff, simd_char4); } +static simd_char8 SIMD_CFUNC simd_char(simd_short8 __x) { return __builtin_convertvector(__x & 0xff, simd_char8); } +static simd_char16 SIMD_CFUNC simd_char(simd_short16 __x) { return __builtin_convertvector(__x & 0xff, simd_char16); } +static simd_char32 SIMD_CFUNC simd_char(simd_short32 __x) { return __builtin_convertvector(__x & 0xff, simd_char32); } +static simd_char2 SIMD_CFUNC simd_char(simd_ushort2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_ushort3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_ushort4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_ushort8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_ushort16 __x) { return simd_char(simd_short(__x)); } +static simd_char32 SIMD_CFUNC simd_char(simd_ushort32 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_int2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_int3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_int4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_int8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_int16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_uint2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_uint3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_uint4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_uint8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_uint16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_float2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_float3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_float4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_float8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_float16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_long2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_long3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_long4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_long8 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_ulong2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_ulong3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_ulong4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_ulong8 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_double2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_double3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_double4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_double8 __x) { return simd_char(simd_short(__x)); } + +static simd_char2 SIMD_CFUNC simd_char_sat(simd_char2 __x) { return __x; } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_char3 __x) { return __x; } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_char4 __x) { return __x; } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_char8 __x) { return __x; } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_char16 __x) { return __x; } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_char32 __x) { return __x; } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_short2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_short3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_short4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_short8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_short16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_short32 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_int2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_int3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_int4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_int8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_int16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_float2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_float3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_float4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_float8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_float16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_long2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_long3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_long4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_long8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_double2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_double3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_double4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_double8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uchar2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uchar3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uchar4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uchar8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uchar16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_uchar32 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ushort2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ushort3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ushort4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ushort8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_ushort16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_ushort32 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uint2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uint3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uint4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uint8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uint16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ulong2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ulong3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ulong4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ulong8 __x) { return simd_char(simd_min(__x,0x7f)); } + + +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_char2 __x) { return (simd_uchar2)__x; } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_char3 __x) { return (simd_uchar3)__x; } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_char4 __x) { return (simd_uchar4)__x; } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_char8 __x) { return (simd_uchar8)__x; } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_char16 __x) { return (simd_uchar16)__x; } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_char32 __x) { return (simd_uchar32)__x; } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uchar2 __x) { return __x; } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uchar3 __x) { return __x; } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uchar4 __x) { return __x; } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uchar8 __x) { return __x; } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uchar16 __x) { return __x; } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_uchar32 __x) { return __x; } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_short2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_short3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_short4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_short8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_short16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_short32 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ushort2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ushort3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ushort4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ushort8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_ushort16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_ushort32 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_int2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_int3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_int4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_int8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_int16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uint2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uint3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uint4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uint8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uint16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_float2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_float3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_float4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_float8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_float16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_long2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_long3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_long4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_long8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ulong2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ulong3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ulong4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ulong8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_double2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_double3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_double4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_double8 __x) { return simd_uchar(simd_char(__x)); } + +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_char2 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_char3 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_char4 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_char8 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_char16 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_char32 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_short2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_short3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_short4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_short8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_short16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_short32 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_int2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_int3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_int4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_int8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_int16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_float2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_float3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_float4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_float8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_float16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_long2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_long3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_long4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_long8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_double2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_double3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_double4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_double8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uchar2 __x) { return __x; } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uchar3 __x) { return __x; } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uchar4 __x) { return __x; } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uchar8 __x) { return __x; } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uchar16 __x) { return __x; } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_uchar32 __x) { return __x; } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ushort2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ushort3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ushort4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ushort8 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_ushort16 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_ushort32 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uint2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uint3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uint4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uint8 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uint16 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ulong2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ulong3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ulong4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ulong8 __x) { return simd_uchar(simd_min(__x,0xff)); } + + +static simd_short2 SIMD_CFUNC simd_short(simd_char2 __x) { return __builtin_convertvector(__x, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_char3 __x) { return __builtin_convertvector(__x, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_char4 __x) { return __builtin_convertvector(__x, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_char8 __x) { return __builtin_convertvector(__x, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_char16 __x) { return __builtin_convertvector(__x, simd_short16); } +static simd_short32 SIMD_CFUNC simd_short(simd_char32 __x) { return __builtin_convertvector(__x, simd_short32); } +static simd_short2 SIMD_CFUNC simd_short(simd_uchar2 __x) { return __builtin_convertvector(__x, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_uchar3 __x) { return __builtin_convertvector(__x, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_uchar4 __x) { return __builtin_convertvector(__x, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_uchar8 __x) { return __builtin_convertvector(__x, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_uchar16 __x) { return __builtin_convertvector(__x, simd_short16); } +static simd_short32 SIMD_CFUNC simd_short(simd_uchar32 __x) { return __builtin_convertvector(__x, simd_short32); } +static simd_short2 SIMD_CFUNC simd_short(simd_short2 __x) { return __x; } +static simd_short3 SIMD_CFUNC simd_short(simd_short3 __x) { return __x; } +static simd_short4 SIMD_CFUNC simd_short(simd_short4 __x) { return __x; } +static simd_short8 SIMD_CFUNC simd_short(simd_short8 __x) { return __x; } +static simd_short16 SIMD_CFUNC simd_short(simd_short16 __x) { return __x; } +static simd_short32 SIMD_CFUNC simd_short(simd_short32 __x) { return __x; } +static simd_short2 SIMD_CFUNC simd_short(simd_ushort2 __x) { return (simd_short2)__x; } +static simd_short3 SIMD_CFUNC simd_short(simd_ushort3 __x) { return (simd_short3)__x; } +static simd_short4 SIMD_CFUNC simd_short(simd_ushort4 __x) { return (simd_short4)__x; } +static simd_short8 SIMD_CFUNC simd_short(simd_ushort8 __x) { return (simd_short8)__x; } +static simd_short16 SIMD_CFUNC simd_short(simd_ushort16 __x) { return (simd_short16)__x; } +static simd_short32 SIMD_CFUNC simd_short(simd_ushort32 __x) { return (simd_short32)__x; } +static simd_short2 SIMD_CFUNC simd_short(simd_int2 __x) { return __builtin_convertvector(__x & 0xffff, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_int3 __x) { return __builtin_convertvector(__x & 0xffff, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_int4 __x) { return __builtin_convertvector(__x & 0xffff, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_int8 __x) { return __builtin_convertvector(__x & 0xffff, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_int16 __x) { return __builtin_convertvector(__x & 0xffff, simd_short16); } +static simd_short2 SIMD_CFUNC simd_short(simd_uint2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_uint3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_uint4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_uint8 __x) { return simd_short(simd_int(__x)); } +static simd_short16 SIMD_CFUNC simd_short(simd_uint16 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_float2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_float3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_float4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_float8 __x) { return simd_short(simd_int(__x)); } +static simd_short16 SIMD_CFUNC simd_short(simd_float16 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_long2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_long3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_long4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_long8 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_ulong2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_ulong3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_ulong4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_ulong8 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_double2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_double3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_double4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_double8 __x) { return simd_short(simd_int(__x)); } + +static simd_short2 SIMD_CFUNC simd_short_sat(simd_char2 __x) { return simd_short(__x); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_char3 __x) { return simd_short(__x); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_char4 __x) { return simd_short(__x); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_char8 __x) { return simd_short(__x); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_char16 __x) { return simd_short(__x); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_char32 __x) { return simd_short(__x); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_short2 __x) { return __x; } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_short3 __x) { return __x; } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_short4 __x) { return __x; } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_short8 __x) { return __x; } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_short16 __x) { return __x; } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_short32 __x) { return __x; } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_int2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_int3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_int4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_int8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_int16 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_float2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_float3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_float4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_float8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_float16 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_long2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_long3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_long4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_long8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_double2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_double3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_double4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_double8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uchar2 __x) { return simd_short(__x); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uchar3 __x) { return simd_short(__x); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uchar4 __x) { return simd_short(__x); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uchar8 __x) { return simd_short(__x); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uchar16 __x) { return simd_short(__x); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_uchar32 __x) { return simd_short(__x); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ushort2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ushort3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ushort4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ushort8 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_ushort16 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_ushort32 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uint2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uint3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uint4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uint8 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uint16 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ulong2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ulong3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ulong4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ulong8 __x) { return simd_short(simd_min(__x,0x7fff)); } + + +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_char2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_char3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_char4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_char8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_char16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_char32 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uchar2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uchar3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uchar4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uchar8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uchar16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_uchar32 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_short2 __x) { return (simd_ushort2)__x; } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_short3 __x) { return (simd_ushort3)__x; } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_short4 __x) { return (simd_ushort4)__x; } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_short8 __x) { return (simd_ushort8)__x; } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_short16 __x) { return (simd_ushort16)__x; } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_short32 __x) { return (simd_ushort32)__x; } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ushort2 __x) { return __x; } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ushort3 __x) { return __x; } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ushort4 __x) { return __x; } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ushort8 __x) { return __x; } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_ushort16 __x) { return __x; } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_ushort32 __x) { return __x; } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_int2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_int3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_int4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_int8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_int16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uint2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uint3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uint4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uint8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uint16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_float2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_float3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_float4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_float8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_float16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_long2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_long3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_long4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_long8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ulong2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ulong3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ulong4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ulong8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_double2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_double3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_double4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_double8 __x) { return simd_ushort(simd_short(__x)); } + +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_char2 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_char3 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_char4 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_char8 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_char16 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_char32 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_short2 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_short3 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_short4 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_short8 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_short16 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_short32 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_int2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_int3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_int4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_int8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_int16 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_float2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_float3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_float4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_float8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_float16 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_long2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_long3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_long4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_long8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_double2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_double3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_double4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_double8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uchar2 __x) { return simd_ushort(__x); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uchar3 __x) { return simd_ushort(__x); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uchar4 __x) { return simd_ushort(__x); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uchar8 __x) { return simd_ushort(__x); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uchar16 __x) { return simd_ushort(__x); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_uchar32 __x) { return simd_ushort(__x); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ushort2 __x) { return __x; } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ushort3 __x) { return __x; } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ushort4 __x) { return __x; } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ushort8 __x) { return __x; } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_ushort16 __x) { return __x; } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_ushort32 __x) { return __x; } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uint2 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uint3 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uint4 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uint8 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uint16 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ulong2 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ulong3 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ulong4 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ulong8 __x) { return simd_ushort(simd_min(__x, 0xffff)); } + + +static simd_int2 SIMD_CFUNC simd_int(simd_char2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_char3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_char4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_char8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_char16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_uchar2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_uchar3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_uchar4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_uchar8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_uchar16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_short2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_short3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_short4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_short8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_short16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_ushort2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_ushort3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_ushort4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_ushort8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_ushort16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_int2 __x) { return __x; } +static simd_int3 SIMD_CFUNC simd_int(simd_int3 __x) { return __x; } +static simd_int4 SIMD_CFUNC simd_int(simd_int4 __x) { return __x; } +static simd_int8 SIMD_CFUNC simd_int(simd_int8 __x) { return __x; } +static simd_int16 SIMD_CFUNC simd_int(simd_int16 __x) { return __x; } +static simd_int2 SIMD_CFUNC simd_int(simd_uint2 __x) { return (simd_int2)__x; } +static simd_int3 SIMD_CFUNC simd_int(simd_uint3 __x) { return (simd_int3)__x; } +static simd_int4 SIMD_CFUNC simd_int(simd_uint4 __x) { return (simd_int4)__x; } +static simd_int8 SIMD_CFUNC simd_int(simd_uint8 __x) { return (simd_int8)__x; } +static simd_int16 SIMD_CFUNC simd_int(simd_uint16 __x) { return (simd_int16)__x; } +static simd_int2 SIMD_CFUNC simd_int(simd_float2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_float3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_float4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_float8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_float16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_long2 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_long3 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_long4 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_long8 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int8); } +static simd_int2 SIMD_CFUNC simd_int(simd_ulong2 __x) { return simd_int(simd_long(__x)); } +static simd_int3 SIMD_CFUNC simd_int(simd_ulong3 __x) { return simd_int(simd_long(__x)); } +static simd_int4 SIMD_CFUNC simd_int(simd_ulong4 __x) { return simd_int(simd_long(__x)); } +static simd_int8 SIMD_CFUNC simd_int(simd_ulong8 __x) { return simd_int(simd_long(__x)); } +static simd_int2 SIMD_CFUNC simd_int(simd_double2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_double3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_double4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_double8 __x) { return __builtin_convertvector(__x, simd_int8); } + +static simd_int2 SIMD_CFUNC simd_int_sat(simd_char2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_char3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_char4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_char8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_char16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_short2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_short3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_short4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_short8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_short16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_int2 __x) { return __x; } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_int3 __x) { return __x; } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_int4 __x) { return __x; } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_int8 __x) { return __x; } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_int16 __x) { return __x; } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_float2 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_float3 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_float4 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_float8 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_float16 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_long2 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_long3 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_long4 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_long8 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_double2 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_double3 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_double4 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_double8 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uchar2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uchar3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uchar4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uchar8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uchar16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ushort2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ushort3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ushort4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ushort8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_ushort16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uint2 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uint3 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uint4 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uint8 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uint16 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ulong2 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ulong3 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ulong4 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ulong8 __x) { return simd_int(simd_min(__x,0x7fffffff)); } + +static simd_int2 SIMD_CFUNC simd_int_rte(simd_float2 __x) { +#if defined __arm64__ + return vcvtn_s32_f32(__x); +#else + return simd_make_int2(simd_int_rte(simd_make_float4_undef(__x))); +#endif +} + +static simd_int3 SIMD_CFUNC simd_int_rte(simd_float3 __x) { + return simd_make_int3(simd_int_rte(simd_make_float4_undef(__x))); +} + +static simd_int4 SIMD_CFUNC simd_int_rte(simd_float4 __x) { +#if defined __SSE2__ + return _mm_cvtps_epi32(__x); +#elif defined __arm64__ + return vcvtnq_s32_f32(__x); +#else + simd_float4 magic = __tg_copysign(0x1.0p23, __x); + simd_int4 x_is_small = __tg_fabs(__x) < 0x1.0p23; + return __builtin_convertvector(simd_bitselect(__x, (__x + magic) - magic, x_is_small & 0x7fffffff), simd_int4); +#endif +} + +static simd_int8 SIMD_CFUNC simd_int_rte(simd_float8 __x) { +#if defined __AVX__ + return _mm256_cvtps_epi32(__x); +#else + return simd_make_int8(simd_int_rte(__x.lo), simd_int_rte(__x.hi)); +#endif +} + +static simd_int16 SIMD_CFUNC simd_int_rte(simd_float16 __x) { +#if defined __AVX512F__ + return _mm512_cvt_roundps_epi32(__x, _MM_FROUND_RINT); +#else + return simd_make_int16(simd_int_rte(__x.lo), simd_int_rte(__x.hi)); +#endif +} + +static simd_uint2 SIMD_CFUNC simd_uint(simd_char2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_char3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_char4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_char8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_char16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_uchar2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_uchar3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_uchar4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_uchar8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_uchar16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_short2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_short3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_short4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_short8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_short16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_ushort2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_ushort3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_ushort4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_ushort8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_ushort16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_int2 __x) { return (simd_uint2)__x; } +static simd_uint3 SIMD_CFUNC simd_uint(simd_int3 __x) { return (simd_uint3)__x; } +static simd_uint4 SIMD_CFUNC simd_uint(simd_int4 __x) { return (simd_uint4)__x; } +static simd_uint8 SIMD_CFUNC simd_uint(simd_int8 __x) { return (simd_uint8)__x; } +static simd_uint16 SIMD_CFUNC simd_uint(simd_int16 __x) { return (simd_uint16)__x; } +static simd_uint2 SIMD_CFUNC simd_uint(simd_uint2 __x) { return __x; } +static simd_uint3 SIMD_CFUNC simd_uint(simd_uint3 __x) { return __x; } +static simd_uint4 SIMD_CFUNC simd_uint(simd_uint4 __x) { return __x; } +static simd_uint8 SIMD_CFUNC simd_uint(simd_uint8 __x) { return __x; } +static simd_uint16 SIMD_CFUNC simd_uint(simd_uint16 __x) { return __x; } +static simd_uint2 SIMD_CFUNC simd_uint(simd_float2 __x) { simd_int2 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float2)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint2)0,0x80000000,__big); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_float3 __x) { simd_int3 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float3)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint3)0,0x80000000,__big); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_float4 __x) { simd_int4 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float4)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint4)0,0x80000000,__big); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_float8 __x) { simd_int8 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float8)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint8)0,0x80000000,__big); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_float16 __x) { simd_int16 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float16)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint16)0,0x80000000,__big); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_long2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_long3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_long4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_long8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_ulong2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_ulong3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_ulong4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_ulong8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_double2 __x) { simd_long2 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double2)0,0x1.0p31,__big))) + simd_bitselect((simd_uint2)0,0x80000000,simd_int(__big)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_double3 __x) { simd_long3 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double3)0,0x1.0p31,__big))) + simd_bitselect((simd_uint3)0,0x80000000,simd_int(__big)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_double4 __x) { simd_long4 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double4)0,0x1.0p31,__big))) + simd_bitselect((simd_uint4)0,0x80000000,simd_int(__big)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_double8 __x) { simd_long8 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double8)0,0x1.0p31,__big))) + simd_bitselect((simd_uint8)0,0x80000000,simd_int(__big)); } + +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_char2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_char3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_char4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_char8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_char16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_short2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_short3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_short4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_short8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_short16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_int2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_int3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_int4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_int8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_int16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_float2 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_float3 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_float4 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_float8 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_float16 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_long2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_long3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_long4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_long8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_double2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_double3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_double4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_double8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uchar2 __x) { return simd_uint(__x); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uchar3 __x) { return simd_uint(__x); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uchar4 __x) { return simd_uint(__x); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uchar8 __x) { return simd_uint(__x); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uchar16 __x) { return simd_uint(__x); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ushort2 __x) { return simd_uint(__x); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ushort3 __x) { return simd_uint(__x); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ushort4 __x) { return simd_uint(__x); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ushort8 __x) { return simd_uint(__x); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_ushort16 __x) { return simd_uint(__x); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uint2 __x) { return __x; } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uint3 __x) { return __x; } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uint4 __x) { return __x; } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uint8 __x) { return __x; } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uint16 __x) { return __x; } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ulong2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ulong3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ulong4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ulong8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } + + +static simd_float2 SIMD_CFUNC simd_float(simd_char2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_char3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_char4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_char8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_char16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_uchar2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_uchar3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_uchar4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_uchar8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_uchar16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_short2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_short3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_short4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_short8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_short16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_ushort2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_ushort3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_ushort4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_ushort8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_ushort16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_int2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_int3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_int4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_int8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float16 SIMD_CFUNC simd_float(simd_int16 __x) { return __builtin_convertvector(__x,simd_float16); } +static simd_float2 SIMD_CFUNC simd_float(simd_uint2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_uint3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_uint4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_uint8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float16 SIMD_CFUNC simd_float(simd_uint16 __x) { return __builtin_convertvector(__x,simd_float16); } +static simd_float2 SIMD_CFUNC simd_float(simd_float2 __x) { return __x; } +static simd_float3 SIMD_CFUNC simd_float(simd_float3 __x) { return __x; } +static simd_float4 SIMD_CFUNC simd_float(simd_float4 __x) { return __x; } +static simd_float8 SIMD_CFUNC simd_float(simd_float8 __x) { return __x; } +static simd_float16 SIMD_CFUNC simd_float(simd_float16 __x) { return __x; } +static simd_float2 SIMD_CFUNC simd_float(simd_long2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_long3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_long4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_long8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float2 SIMD_CFUNC simd_float(simd_ulong2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_ulong3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_ulong4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_ulong8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float2 SIMD_CFUNC simd_float(simd_double2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_double3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_double4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_double8 __x) { return __builtin_convertvector(__x,simd_float8); } + + +static simd_long2 SIMD_CFUNC simd_long(simd_char2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_char3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_char4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_char8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_uchar2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_uchar3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_uchar4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_uchar8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_short2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_short3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_short4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_short8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_ushort2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_ushort3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_ushort4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_ushort8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_int2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_int3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_int4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_int8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_uint2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_uint3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_uint4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_uint8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_float2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_float3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_float4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_float8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_long2 __x) { return __x; } +static simd_long3 SIMD_CFUNC simd_long(simd_long3 __x) { return __x; } +static simd_long4 SIMD_CFUNC simd_long(simd_long4 __x) { return __x; } +static simd_long8 SIMD_CFUNC simd_long(simd_long8 __x) { return __x; } +static simd_long2 SIMD_CFUNC simd_long(simd_ulong2 __x) { return (simd_long2)__x; } +static simd_long3 SIMD_CFUNC simd_long(simd_ulong3 __x) { return (simd_long3)__x; } +static simd_long4 SIMD_CFUNC simd_long(simd_ulong4 __x) { return (simd_long4)__x; } +static simd_long8 SIMD_CFUNC simd_long(simd_ulong8 __x) { return (simd_long8)__x; } +static simd_long2 SIMD_CFUNC simd_long(simd_double2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_double3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_double4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_double8 __x) { return __builtin_convertvector(__x,simd_long8); } + +static simd_long2 SIMD_CFUNC simd_long_sat(simd_char2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_char3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_char4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_char8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_short2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_short3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_short4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_short8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_int2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_int3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_int4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_int8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_float2 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_float3 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_float4 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_float8 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_long2 __x) { return __x; } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_long3 __x) { return __x; } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_long4 __x) { return __x; } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_long8 __x) { return __x; } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_double2 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_double3 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_double4 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_double8 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uchar2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uchar3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uchar4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uchar8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ushort2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ushort3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ushort4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ushort8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uint2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uint3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uint4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uint8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ulong2 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ulong3 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ulong4 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ulong8 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } + +static simd_long2 SIMD_CFUNC simd_long_rte(simd_double2 __x) { +#if defined __AVX512F__ + return _mm_cvtpd_epi64(__x); +#elif defined __arm64__ + return vcvtnq_s64_f64(__x); +#else + simd_double2 magic = __tg_copysign(0x1.0p52, __x); + simd_long2 x_is_small = __tg_fabs(__x) < 0x1.0p52; + return __builtin_convertvector(simd_bitselect(__x, (__x + magic) - magic, x_is_small & 0x7fffffffffffffff), simd_long2); +#endif +} + +static simd_long3 SIMD_CFUNC simd_long_rte(simd_double3 __x) { + return simd_make_long3(simd_long_rte(simd_make_double4_undef(__x))); +} + +static simd_long4 SIMD_CFUNC simd_long_rte(simd_double4 __x) { +#if defined __AVX512F__ + return _mm256_cvtpd_epi64(__x); +#else + return simd_make_long4(simd_long_rte(__x.lo), simd_long_rte(__x.hi)); +#endif +} + +static simd_long8 SIMD_CFUNC simd_long_rte(simd_double8 __x) { +#if defined __AVX512F__ + return _mm512_cvt_roundpd_epi64(__x, _MM_FROUND_RINT); +#else + return simd_make_long8(simd_long_rte(__x.lo), simd_long_rte(__x.hi)); +#endif +} + + +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_char2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_char3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_char4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_char8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uchar2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uchar3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uchar4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uchar8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_short2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_short3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_short4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_short8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ushort2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ushort3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ushort4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ushort8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_int2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_int3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_int4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_int8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uint2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uint3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uint4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uint8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_float2 __x) { simd_int2 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float2)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong2)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_float3 __x) { simd_int3 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float3)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong3)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_float4 __x) { simd_int4 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float4)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong4)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_float8 __x) { simd_int8 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float8)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong8)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_long2 __x) { return (simd_ulong2)__x; } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_long3 __x) { return (simd_ulong3)__x; } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_long4 __x) { return (simd_ulong4)__x; } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_long8 __x) { return (simd_ulong8)__x; } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ulong2 __x) { return __x; } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ulong3 __x) { return __x; } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ulong4 __x) { return __x; } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ulong8 __x) { return __x; } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_double2 __x) { simd_long2 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double2)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong2)0,0x8000000000000000,__big); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_double3 __x) { simd_long3 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double3)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong3)0,0x8000000000000000,__big); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_double4 __x) { simd_long4 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double4)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong4)0,0x8000000000000000,__big); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_double8 __x) { simd_long8 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double8)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong8)0,0x8000000000000000,__big); } + +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_char2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_char3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_char4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_char8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_short2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_short3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_short4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_short8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_int2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_int3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_int4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_int8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_float2 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_float3 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_float4 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_float8 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_long2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_long3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_long4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_long8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_double2 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_double3 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_double4 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_double8 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uchar2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uchar3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uchar4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uchar8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ushort2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ushort3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ushort4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ushort8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uint2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uint3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uint4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uint8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ulong2 __x) { return __x; } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ulong3 __x) { return __x; } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ulong4 __x) { return __x; } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ulong8 __x) { return __x; } + + +static simd_double2 SIMD_CFUNC simd_double(simd_char2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_char3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_char4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_char8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_uchar2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_uchar3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_uchar4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_uchar8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_short2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_short3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_short4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_short8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_ushort2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_ushort3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_ushort4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_ushort8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_int2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_int3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_int4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_int8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_uint2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_uint3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_uint4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_uint8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_float2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_float3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_float4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_float8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_long2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_long3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_long4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_long8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_ulong2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_ulong3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_ulong4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_ulong8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_double2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_double3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_double4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_double8 __x) { return __builtin_convertvector(__x, simd_double8); } + + +#ifdef __cplusplus +} +#endif +#endif // SIMD_COMPILER_HAS_REQUIRED_FEATURES +#endif // __SIMD_CONVERSION_HEADER__
\ No newline at end of file diff --git a/lib/libc/include/aarch64-macos-gnu/simd/logic.h b/lib/libc/include/aarch64-macos-gnu/simd/logic.h new file mode 100644 index 0000000000..fdefcb632d --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/logic.h @@ -0,0 +1,1315 @@ +/*! @header + * The interfaces declared in this header provide logical and bitwise + * operations on vectors. Some of these function operate elementwise, + * and some produce a scalar result that depends on all lanes of the input. + * + * For functions returning a boolean value, the return type in C and + * Objective-C is _Bool; for C++ it is bool. + * + * Function Result + * ------------------------------------------------------------------ + * simd_all(comparison) True if and only if the comparison is true + * in every vector lane. e.g.: + * + * if (simd_all(x == 0.0f)) { + * // executed if every lane of x + * // contains zero. + * } + * + * The precise function of simd_all is to + * return the high-order bit of the result + * of a horizontal bitwise AND of all vector + * lanes. + * + * simd_any(comparison) True if and only if the comparison is true + * in at least one vector lane. e.g.: + * + * if (simd_any(x < 0.0f)) { + * // executed if any lane of x + * // contains a negative value. + * } + * + * The precise function of simd_all is to + * return the high-order bit of the result + * of a horizontal bitwise OR of all vector + * lanes. + * + * simd_select(x,y,mask) For each lane in the result, selects the + * corresponding element of x if the high- + * order bit of the corresponding element of + * mask is 0, and the corresponding element + * of y otherwise. + * + * simd_bitselect(x,y,mask) For each bit in the result, selects the + * corresponding bit of x if the corresponding + * bit of mask is clear, and the corresponding + * of y otherwise. + * + * In C++, these functions are available under the simd:: namespace: + * + * C++ Function Equivalent C Function + * -------------------------------------------------------------------- + * simd::all(comparison) simd_all(comparison) + * simd::any(comparison) simd_any(comparison) + * simd::select(x,y,mask) simd_select(x,y,mask) + * simd::bitselect(x,y,mask) simd_bitselect(x,y,mask) + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_LOGIC_HEADER +#define SIMD_LOGIC_HEADER + +#include <simd/base.h> +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include <simd/vector_make.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char64 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar64 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. + * @discussion Deprecated. Use simd_any instead. */ +#define vector_any simd_any + +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char64 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar64 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. + * @discussion Deprecated. Use simd_all instead. */ +#define vector_all simd_all + +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float2 simd_select(simd_float2 x, simd_float2 y, simd_int2 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float3 simd_select(simd_float3 x, simd_float3 y, simd_int3 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float4 simd_select(simd_float4 x, simd_float4 y, simd_int4 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float8 simd_select(simd_float8 x, simd_float8 y, simd_int8 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float16 simd_select(simd_float16 x, simd_float16 y, simd_int16 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double2 simd_select(simd_double2 x, simd_double2 y, simd_long2 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double3 simd_select(simd_double3 x, simd_double3 y, simd_long3 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double4 simd_select(simd_double4 x, simd_double4 y, simd_long4 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double8 simd_select(simd_double8 x, simd_double8 y, simd_long8 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. + * @discussion Deprecated. Use simd_select instead. */ +#define vector_select simd_select + +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char2 simd_bitselect(simd_char2 x, simd_char2 y, simd_char2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char3 simd_bitselect(simd_char3 x, simd_char3 y, simd_char3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char4 simd_bitselect(simd_char4 x, simd_char4 y, simd_char4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char8 simd_bitselect(simd_char8 x, simd_char8 y, simd_char8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char16 simd_bitselect(simd_char16 x, simd_char16 y, simd_char16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char32 simd_bitselect(simd_char32 x, simd_char32 y, simd_char32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char64 simd_bitselect(simd_char64 x, simd_char64 y, simd_char64 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar2 simd_bitselect(simd_uchar2 x, simd_uchar2 y, simd_char2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar3 simd_bitselect(simd_uchar3 x, simd_uchar3 y, simd_char3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar4 simd_bitselect(simd_uchar4 x, simd_uchar4 y, simd_char4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar8 simd_bitselect(simd_uchar8 x, simd_uchar8 y, simd_char8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar16 simd_bitselect(simd_uchar16 x, simd_uchar16 y, simd_char16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar32 simd_bitselect(simd_uchar32 x, simd_uchar32 y, simd_char32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar64 simd_bitselect(simd_uchar64 x, simd_uchar64 y, simd_char64 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short2 simd_bitselect(simd_short2 x, simd_short2 y, simd_short2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short3 simd_bitselect(simd_short3 x, simd_short3 y, simd_short3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short4 simd_bitselect(simd_short4 x, simd_short4 y, simd_short4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short8 simd_bitselect(simd_short8 x, simd_short8 y, simd_short8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short16 simd_bitselect(simd_short16 x, simd_short16 y, simd_short16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short32 simd_bitselect(simd_short32 x, simd_short32 y, simd_short32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort2 simd_bitselect(simd_ushort2 x, simd_ushort2 y, simd_short2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort3 simd_bitselect(simd_ushort3 x, simd_ushort3 y, simd_short3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort4 simd_bitselect(simd_ushort4 x, simd_ushort4 y, simd_short4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort8 simd_bitselect(simd_ushort8 x, simd_ushort8 y, simd_short8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort16 simd_bitselect(simd_ushort16 x, simd_ushort16 y, simd_short16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort32 simd_bitselect(simd_ushort32 x, simd_ushort32 y, simd_short32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int2 simd_bitselect(simd_int2 x, simd_int2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int3 simd_bitselect(simd_int3 x, simd_int3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int4 simd_bitselect(simd_int4 x, simd_int4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int8 simd_bitselect(simd_int8 x, simd_int8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int16 simd_bitselect(simd_int16 x, simd_int16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint2 simd_bitselect(simd_uint2 x, simd_uint2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint3 simd_bitselect(simd_uint3 x, simd_uint3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint4 simd_bitselect(simd_uint4 x, simd_uint4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint8 simd_bitselect(simd_uint8 x, simd_uint8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint16 simd_bitselect(simd_uint16 x, simd_uint16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float2 simd_bitselect(simd_float2 x, simd_float2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float3 simd_bitselect(simd_float3 x, simd_float3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float4 simd_bitselect(simd_float4 x, simd_float4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float8 simd_bitselect(simd_float8 x, simd_float8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float16 simd_bitselect(simd_float16 x, simd_float16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long2 simd_bitselect(simd_long2 x, simd_long2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long3 simd_bitselect(simd_long3 x, simd_long3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long4 simd_bitselect(simd_long4 x, simd_long4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long8 simd_bitselect(simd_long8 x, simd_long8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong2 simd_bitselect(simd_ulong2 x, simd_ulong2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong3 simd_bitselect(simd_ulong3 x, simd_ulong3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong4 simd_bitselect(simd_ulong4 x, simd_ulong4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong8 simd_bitselect(simd_ulong8 x, simd_ulong8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double2 simd_bitselect(simd_double2 x, simd_double2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double3 simd_bitselect(simd_double3 x, simd_double3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double4 simd_bitselect(simd_double4 x, simd_double4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double8 simd_bitselect(simd_double8 x, simd_double8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. + * @discussion Deprecated. Use simd_bitselect instead. */ +#define vector_bitselect simd_bitselect + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + /*! @abstract True if and only if the high-order bit of every lane is set. */ + template <typename inttypeN> static SIMD_CPPFUNC simd_bool all(const inttypeN predicate) { return ::simd_all(predicate); } + /*! @abstract True if and only if the high-order bit of any lane is set. */ + template <typename inttypeN> static SIMD_CPPFUNC simd_bool any(const inttypeN predicate) { return ::simd_any(predicate); } + /*! @abstract Each lane of the result is selected from the corresponding lane + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ + template <typename inttypeN, typename fptypeN> static SIMD_CPPFUNC fptypeN select(const fptypeN x, const fptypeN y, const inttypeN predicate) { return ::simd_select(x,y,predicate); } + /*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ + template <typename inttypeN, typename typeN> static SIMD_CPPFUNC typeN bitselect(const typeN x, const typeN y, const inttypeN mask) { return ::simd_bitselect(x,y,mask); } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementations + +static inline SIMD_CFUNC simd_bool simd_any(simd_char2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x3); +#elif defined __arm64__ + return simd_any(x.xyxy); +#else + union { uint16_t i; simd_char2 v; } u = { .v = x }; + return (u.i & 0x8080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x7); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + union { uint32_t i; simd_char3 v; } u = { .v = x }; + return (u.i & 0x808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xf); +#elif defined __arm64__ + return simd_any(x.xyzwxyzw); +#else + union { uint32_t i; simd_char4 v; } u = { .v = x }; + return (u.i & 0x80808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xff); +#elif defined __arm64__ + return vmaxv_u8(x) & 0x80; +#else + union { uint64_t i; simd_char8 v; } u = { .v = x }; + return (u.i & 0x8080808080808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char16 x) { +#if defined __SSE2__ + return _mm_movemask_epi8((__m128i)x); +#elif defined __arm64__ + return vmaxvq_u8(x) & 0x80; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char32 x) { +#if defined __AVX2__ + return _mm256_movemask_epi8(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char64 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar2 x) { + return simd_any((simd_char2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar3 x) { + return simd_any((simd_char3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar4 x) { + return simd_any((simd_char4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar8 x) { + return simd_any((simd_char8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar16 x) { + return simd_any((simd_char16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar32 x) { + return simd_any((simd_char32)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar64 x) { + return simd_any((simd_char64)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xa); +#elif defined __arm64__ + return simd_any(x.xyxy); +#else + union { uint32_t i; simd_short2 v; } u = { .v = x }; + return (u.i & 0x80008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0x2a); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + union { uint64_t i; simd_short3 v; } u = { .v = x }; + return (u.i & 0x800080008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xaa); +#elif defined __arm64__ + return vmaxv_u16(x) & 0x8000; +#else + union { uint64_t i; simd_short4 v; } u = { .v = x }; + return (u.i & 0x8000800080008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)x) & 0xaaaa); +#elif defined __arm64__ + return vmaxvq_u16(x) & 0x8000; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short16 x) { +#if defined __AVX2__ + return (_mm256_movemask_epi8(x) & 0xaaaaaaaa); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short32 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort2 x) { + return simd_any((simd_short2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort3 x) { + return simd_any((simd_short3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort4 x) { + return simd_any((simd_short4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort8 x) { + return simd_any((simd_short8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort16 x) { + return simd_any((simd_short16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort32 x) { + return simd_any((simd_short32)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int2 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x3); +#elif defined __arm64__ + return vmaxv_u32(x) & 0x80000000; +#else + union { uint64_t i; simd_int2 v; } u = { .v = x }; + return (u.i & 0x8000000080000000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int3 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x7); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + return (x.x | x.y | x.z) & 0x80000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int4 x) { +#if defined __SSE2__ + return _mm_movemask_ps((__m128)x); +#elif defined __arm64__ + return vmaxvq_u32(x) & 0x80000000; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int8 x) { +#if defined __AVX__ + return _mm256_movemask_ps(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int16 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint2 x) { + return simd_any((simd_int2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint3 x) { + return simd_any((simd_int3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint4 x) { + return simd_any((simd_int4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint8 x) { + return simd_any((simd_int8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint16 x) { + return simd_any((simd_int16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long2 x) { +#if defined __SSE2__ + return _mm_movemask_pd((__m128d)x); +#elif defined __arm64__ + return (x.x | x.y) & 0x8000000000000000U; +#else + return (x.x | x.y) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long3 x) { +#if defined __AVX__ + return (_mm256_movemask_pd(simd_make_long4_undef(x)) & 0x7); +#else + return (x.x | x.y | x.z) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long4 x) { +#if defined __AVX__ + return _mm256_movemask_pd(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long8 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong2 x) { + return simd_any((simd_long2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong3 x) { + return simd_any((simd_long3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong4 x) { + return simd_any((simd_long4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong8 x) { + return simd_any((simd_long8)x); +} + +static inline SIMD_CFUNC simd_bool simd_all(simd_char2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x3) == 0x3; +#elif defined __arm64__ + return simd_all(x.xyxy); +#else + union { uint16_t i; simd_char2 v; } u = { .v = x }; + return (u.i & 0x8080) == 0x8080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x7) == 0x7; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + union { uint32_t i; simd_char3 v; } u = { .v = x }; + return (u.i & 0x808080) == 0x808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xf) == 0xf; +#elif defined __arm64__ + return simd_all(x.xyzwxyzw); +#else + union { uint32_t i; simd_char4 v; } u = { .v = x }; + return (u.i & 0x80808080) == 0x80808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xff) == 0xff; +#elif defined __arm64__ + return vminv_u8(x) & 0x80; +#else + union { uint64_t i; simd_char8 v; } u = { .v = x }; + return (u.i & 0x8080808080808080) == 0x8080808080808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char16 x) { +#if defined __SSE2__ + return _mm_movemask_epi8((__m128i)x) == 0xffff; +#elif defined __arm64__ + return vminvq_u8(x) & 0x80; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char32 x) { +#if defined __AVX2__ + return _mm256_movemask_epi8(x) == 0xffffffff; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char64 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar2 x) { + return simd_all((simd_char2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar3 x) { + return simd_all((simd_char3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar4 x) { + return simd_all((simd_char4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar8 x) { + return simd_all((simd_char8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar16 x) { + return simd_all((simd_char16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar32 x) { + return simd_all((simd_char32)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar64 x) { + return simd_all((simd_char64)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xa) == 0xa; +#elif defined __arm64__ + return simd_all(x.xyxy); +#else + union { uint32_t i; simd_short2 v; } u = { .v = x }; + return (u.i & 0x80008000) == 0x80008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0x2a) == 0x2a; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + union { uint64_t i; simd_short3 v; } u = { .v = x }; + return (u.i & 0x800080008000) == 0x800080008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xaa) == 0xaa; +#elif defined __arm64__ + return vminv_u16(x) & 0x8000; +#else + union { uint64_t i; simd_short4 v; } u = { .v = x }; + return (u.i & 0x8000800080008000) == 0x8000800080008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)x) & 0xaaaa) == 0xaaaa; +#elif defined __arm64__ + return vminvq_u16(x) & 0x8000; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short16 x) { +#if defined __AVX2__ + return (_mm256_movemask_epi8(x) & 0xaaaaaaaa) == 0xaaaaaaaa; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short32 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort2 x) { + return simd_all((simd_short2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort3 x) { + return simd_all((simd_short3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort4 x) { + return simd_all((simd_short4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort8 x) { + return simd_all((simd_short8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort16 x) { + return simd_all((simd_short16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort32 x) { + return simd_all((simd_short32)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int2 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x3) == 0x3; +#elif defined __arm64__ + return vminv_u32(x) & 0x80000000; +#else + union { uint64_t i; simd_int2 v; } u = { .v = x }; + return (u.i & 0x8000000080000000) == 0x8000000080000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int3 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x7) == 0x7; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + return (x.x & x.y & x.z) & 0x80000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int4 x) { +#if defined __SSE2__ + return _mm_movemask_ps((__m128)x) == 0xf; +#elif defined __arm64__ + return vminvq_u32(x) & 0x80000000; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int8 x) { +#if defined __AVX__ + return _mm256_movemask_ps(x) == 0xff; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int16 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint2 x) { + return simd_all((simd_int2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint3 x) { + return simd_all((simd_int3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint4 x) { + return simd_all((simd_int4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint8 x) { + return simd_all((simd_int8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint16 x) { + return simd_all((simd_int16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long2 x) { +#if defined __SSE2__ + return _mm_movemask_pd((__m128d)x) == 0x3; +#elif defined __arm64__ + return (x.x & x.y) & 0x8000000000000000U; +#else + return (x.x & x.y) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long3 x) { +#if defined __AVX__ + return (_mm256_movemask_pd(simd_make_long4_undef(x)) & 0x7) == 0x7; +#else + return (x.x & x.y & x.z) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long4 x) { +#if defined __AVX__ + return _mm256_movemask_pd(x) == 0xf; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long8 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong2 x) { + return simd_all((simd_long2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong3 x) { + return simd_all((simd_long3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong4 x) { + return simd_all((simd_long4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong8 x) { + return simd_all((simd_long8)x); +} + +static inline SIMD_CFUNC simd_float2 simd_select(simd_float2 x, simd_float2 y, simd_int2 mask) { + return simd_make_float2(simd_select(simd_make_float4_undef(x), simd_make_float4_undef(y), simd_make_int4_undef(mask))); +} +static inline SIMD_CFUNC simd_float3 simd_select(simd_float3 x, simd_float3 y, simd_int3 mask) { + return simd_make_float3(simd_select(simd_make_float4_undef(x), simd_make_float4_undef(y), simd_make_int4_undef(mask))); +} +static inline SIMD_CFUNC simd_float4 simd_select(simd_float4 x, simd_float4 y, simd_int4 mask) { +#if defined __SSE4_1__ + return _mm_blendv_ps(x, y, (__m128)mask); +#else + return simd_bitselect(x, y, mask >> 31); +#endif +} +static inline SIMD_CFUNC simd_float8 simd_select(simd_float8 x, simd_float8 y, simd_int8 mask) { +#if defined __AVX__ + return _mm256_blendv_ps(x, y, mask); +#else + return simd_bitselect(x, y, mask >> 31); +#endif +} +static inline SIMD_CFUNC simd_float16 simd_select(simd_float16 x, simd_float16 y, simd_int16 mask) { + return simd_bitselect(x, y, mask >> 31); +} +static inline SIMD_CFUNC simd_double2 simd_select(simd_double2 x, simd_double2 y, simd_long2 mask) { +#if defined __SSE4_1__ + return _mm_blendv_pd(x, y, (__m128d)mask); +#else + return simd_bitselect(x, y, mask >> 63); +#endif +} +static inline SIMD_CFUNC simd_double3 simd_select(simd_double3 x, simd_double3 y, simd_long3 mask) { + return simd_make_double3(simd_select(simd_make_double4_undef(x), simd_make_double4_undef(y), simd_make_long4_undef(mask))); +} +static inline SIMD_CFUNC simd_double4 simd_select(simd_double4 x, simd_double4 y, simd_long4 mask) { +#if defined __AVX__ + return _mm256_blendv_pd(x, y, mask); +#else + return simd_bitselect(x, y, mask >> 63); +#endif +} +static inline SIMD_CFUNC simd_double8 simd_select(simd_double8 x, simd_double8 y, simd_long8 mask) { + return simd_bitselect(x, y, mask >> 63); +} + +static inline SIMD_CFUNC simd_char2 simd_bitselect(simd_char2 x, simd_char2 y, simd_char2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char3 simd_bitselect(simd_char3 x, simd_char3 y, simd_char3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char4 simd_bitselect(simd_char4 x, simd_char4 y, simd_char4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char8 simd_bitselect(simd_char8 x, simd_char8 y, simd_char8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char16 simd_bitselect(simd_char16 x, simd_char16 y, simd_char16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char32 simd_bitselect(simd_char32 x, simd_char32 y, simd_char32 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char64 simd_bitselect(simd_char64 x, simd_char64 y, simd_char64 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_uchar2 simd_bitselect(simd_uchar2 x, simd_uchar2 y, simd_char2 mask) { + return (simd_uchar2)simd_bitselect((simd_char2)x, (simd_char2)y, mask); +} +static inline SIMD_CFUNC simd_uchar3 simd_bitselect(simd_uchar3 x, simd_uchar3 y, simd_char3 mask) { + return (simd_uchar3)simd_bitselect((simd_char3)x, (simd_char3)y, mask); +} +static inline SIMD_CFUNC simd_uchar4 simd_bitselect(simd_uchar4 x, simd_uchar4 y, simd_char4 mask) { + return (simd_uchar4)simd_bitselect((simd_char4)x, (simd_char4)y, mask); +} +static inline SIMD_CFUNC simd_uchar8 simd_bitselect(simd_uchar8 x, simd_uchar8 y, simd_char8 mask) { + return (simd_uchar8)simd_bitselect((simd_char8)x, (simd_char8)y, mask); +} +static inline SIMD_CFUNC simd_uchar16 simd_bitselect(simd_uchar16 x, simd_uchar16 y, simd_char16 mask) { + return (simd_uchar16)simd_bitselect((simd_char16)x, (simd_char16)y, mask); +} +static inline SIMD_CFUNC simd_uchar32 simd_bitselect(simd_uchar32 x, simd_uchar32 y, simd_char32 mask) { + return (simd_uchar32)simd_bitselect((simd_char32)x, (simd_char32)y, mask); +} +static inline SIMD_CFUNC simd_uchar64 simd_bitselect(simd_uchar64 x, simd_uchar64 y, simd_char64 mask) { + return (simd_uchar64)simd_bitselect((simd_char64)x, (simd_char64)y, mask); +} +static inline SIMD_CFUNC simd_short2 simd_bitselect(simd_short2 x, simd_short2 y, simd_short2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short3 simd_bitselect(simd_short3 x, simd_short3 y, simd_short3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short4 simd_bitselect(simd_short4 x, simd_short4 y, simd_short4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short8 simd_bitselect(simd_short8 x, simd_short8 y, simd_short8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short16 simd_bitselect(simd_short16 x, simd_short16 y, simd_short16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short32 simd_bitselect(simd_short32 x, simd_short32 y, simd_short32 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_ushort2 simd_bitselect(simd_ushort2 x, simd_ushort2 y, simd_short2 mask) { + return (simd_ushort2)simd_bitselect((simd_short2)x, (simd_short2)y, mask); +} +static inline SIMD_CFUNC simd_ushort3 simd_bitselect(simd_ushort3 x, simd_ushort3 y, simd_short3 mask) { + return (simd_ushort3)simd_bitselect((simd_short3)x, (simd_short3)y, mask); +} +static inline SIMD_CFUNC simd_ushort4 simd_bitselect(simd_ushort4 x, simd_ushort4 y, simd_short4 mask) { + return (simd_ushort4)simd_bitselect((simd_short4)x, (simd_short4)y, mask); +} +static inline SIMD_CFUNC simd_ushort8 simd_bitselect(simd_ushort8 x, simd_ushort8 y, simd_short8 mask) { + return (simd_ushort8)simd_bitselect((simd_short8)x, (simd_short8)y, mask); +} +static inline SIMD_CFUNC simd_ushort16 simd_bitselect(simd_ushort16 x, simd_ushort16 y, simd_short16 mask) { + return (simd_ushort16)simd_bitselect((simd_short16)x, (simd_short16)y, mask); +} +static inline SIMD_CFUNC simd_ushort32 simd_bitselect(simd_ushort32 x, simd_ushort32 y, simd_short32 mask) { + return (simd_ushort32)simd_bitselect((simd_short32)x, (simd_short32)y, mask); +} +static inline SIMD_CFUNC simd_int2 simd_bitselect(simd_int2 x, simd_int2 y, simd_int2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int3 simd_bitselect(simd_int3 x, simd_int3 y, simd_int3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int4 simd_bitselect(simd_int4 x, simd_int4 y, simd_int4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int8 simd_bitselect(simd_int8 x, simd_int8 y, simd_int8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int16 simd_bitselect(simd_int16 x, simd_int16 y, simd_int16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_uint2 simd_bitselect(simd_uint2 x, simd_uint2 y, simd_int2 mask) { + return (simd_uint2)simd_bitselect((simd_int2)x, (simd_int2)y, mask); +} +static inline SIMD_CFUNC simd_uint3 simd_bitselect(simd_uint3 x, simd_uint3 y, simd_int3 mask) { + return (simd_uint3)simd_bitselect((simd_int3)x, (simd_int3)y, mask); +} +static inline SIMD_CFUNC simd_uint4 simd_bitselect(simd_uint4 x, simd_uint4 y, simd_int4 mask) { + return (simd_uint4)simd_bitselect((simd_int4)x, (simd_int4)y, mask); +} +static inline SIMD_CFUNC simd_uint8 simd_bitselect(simd_uint8 x, simd_uint8 y, simd_int8 mask) { + return (simd_uint8)simd_bitselect((simd_int8)x, (simd_int8)y, mask); +} +static inline SIMD_CFUNC simd_uint16 simd_bitselect(simd_uint16 x, simd_uint16 y, simd_int16 mask) { + return (simd_uint16)simd_bitselect((simd_int16)x, (simd_int16)y, mask); +} +static inline SIMD_CFUNC simd_float2 simd_bitselect(simd_float2 x, simd_float2 y, simd_int2 mask) { + return (simd_float2)simd_bitselect((simd_int2)x, (simd_int2)y, mask); +} +static inline SIMD_CFUNC simd_float3 simd_bitselect(simd_float3 x, simd_float3 y, simd_int3 mask) { + return (simd_float3)simd_bitselect((simd_int3)x, (simd_int3)y, mask); +} +static inline SIMD_CFUNC simd_float4 simd_bitselect(simd_float4 x, simd_float4 y, simd_int4 mask) { + return (simd_float4)simd_bitselect((simd_int4)x, (simd_int4)y, mask); +} +static inline SIMD_CFUNC simd_float8 simd_bitselect(simd_float8 x, simd_float8 y, simd_int8 mask) { + return (simd_float8)simd_bitselect((simd_int8)x, (simd_int8)y, mask); +} +static inline SIMD_CFUNC simd_float16 simd_bitselect(simd_float16 x, simd_float16 y, simd_int16 mask) { + return (simd_float16)simd_bitselect((simd_int16)x, (simd_int16)y, mask); +} +static inline SIMD_CFUNC simd_long2 simd_bitselect(simd_long2 x, simd_long2 y, simd_long2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long3 simd_bitselect(simd_long3 x, simd_long3 y, simd_long3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long4 simd_bitselect(simd_long4 x, simd_long4 y, simd_long4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long8 simd_bitselect(simd_long8 x, simd_long8 y, simd_long8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_ulong2 simd_bitselect(simd_ulong2 x, simd_ulong2 y, simd_long2 mask) { + return (simd_ulong2)simd_bitselect((simd_long2)x, (simd_long2)y, mask); +} +static inline SIMD_CFUNC simd_ulong3 simd_bitselect(simd_ulong3 x, simd_ulong3 y, simd_long3 mask) { + return (simd_ulong3)simd_bitselect((simd_long3)x, (simd_long3)y, mask); +} +static inline SIMD_CFUNC simd_ulong4 simd_bitselect(simd_ulong4 x, simd_ulong4 y, simd_long4 mask) { + return (simd_ulong4)simd_bitselect((simd_long4)x, (simd_long4)y, mask); +} +static inline SIMD_CFUNC simd_ulong8 simd_bitselect(simd_ulong8 x, simd_ulong8 y, simd_long8 mask) { + return (simd_ulong8)simd_bitselect((simd_long8)x, (simd_long8)y, mask); +} +static inline SIMD_CFUNC simd_double2 simd_bitselect(simd_double2 x, simd_double2 y, simd_long2 mask) { + return (simd_double2)simd_bitselect((simd_long2)x, (simd_long2)y, mask); +} +static inline SIMD_CFUNC simd_double3 simd_bitselect(simd_double3 x, simd_double3 y, simd_long3 mask) { + return (simd_double3)simd_bitselect((simd_long3)x, (simd_long3)y, mask); +} +static inline SIMD_CFUNC simd_double4 simd_bitselect(simd_double4 x, simd_double4 y, simd_long4 mask) { + return (simd_double4)simd_bitselect((simd_long4)x, (simd_long4)y, mask); +} +static inline SIMD_CFUNC simd_double8 simd_bitselect(simd_double8 x, simd_double8 y, simd_long8 mask) { + return (simd_double8)simd_bitselect((simd_long8)x, (simd_long8)y, mask); +} + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* __SIMD_LOGIC_HEADER__ */
\ No newline at end of file diff --git a/lib/libc/include/aarch64-macos-gnu/simd/math.h b/lib/libc/include/aarch64-macos-gnu/simd/math.h new file mode 100644 index 0000000000..4d5c654f69 --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/math.h @@ -0,0 +1,5380 @@ +/*! @header + * The interfaces declared in this header provide elementwise math operations + * on vectors; each lane of the result vector depends only on the data in the + * corresponding lane of the argument(s) to the function. + * + * You should not use the C functions declared in this header directly (these + * are functions with names like `__tg_cos(x)`). These are merely + * implementation details of <tgmath.h> overloading; instead of calling + * `__tg_cos(x)`, call `cos(x)`. If you are writing C++, use `simd::cos(x)`. + * + * Note that while these vector functions are relatively recent additions, + * scalar fallback is provided for all of them, so they are available even + * when targeting older OS versions. + * + * The following functions are available: + * + * C name C++ name Notes + * ---------------------------------------------------------------------- + * acos(x) simd::acos(x) + * asin(x) simd::asin(x) + * atan(x) simd::atan(x) + * atan2(y,x) simd::atan2(y,x) The argument order matches the scalar + * atan2 function, which gives the angle + * of a line with slope y/x. + * cos(x) simd::cos(x) + * sin(x) simd::sin(x) + * tan(x) simd::tan(x) + * + * cospi(x) simd::cospi(x) Returns cos(pi*x), sin(pi*x), tan(pi*x) + * sinpi(x) simd::sinpi(x) more efficiently and accurately than + * tanpi(x) simd::tanpi(x) would otherwise be possible + * + * acosh(x) simd::acosh(x) + * asinh(x) simd::asinh(x) + * atanh(x) simd::atanh(x) + * + * cosh(x) simd::cosh(x) + * sinh(x) simd::sinh(x) + * tanh(x) simd::tanh(x) + * + * exp(x) simd::exp(x) + * exp2(x) simd::exp2(x) + * exp10(x) simd::exp10(x) More efficient that pow(10,x). + * expm1(x) simd::expm1(x) exp(x)-1, accurate even for tiny x. + * + * log(x) simd::log(x) + * log2(x) simd::log2(x) + * log10(x) simd::log10(x) + * log1p(x) simd::log1p(x) log(1+x), accurate even for tiny x. + * + * fabs(x) simd::fabs(x) + * cbrt(x) simd::cbrt(x) + * sqrt(x) simd::sqrt(x) + * pow(x,y) simd::pow(x,y) + * copysign(x,y) simd::copysign(x,y) + * hypot(x,y) simd::hypot(x,y) sqrt(x*x + y*y), computed without + * overflow.1 + * erf(x) simd::erf(x) + * erfc(x) simd::erfc(x) + * tgamma(x) simd::tgamma(x) + * + * fmod(x,y) simd::fmod(x,y) + * remainder(x,y) simd::remainder(x,y) + * + * ceil(x) simd::ceil(x) + * floor(x) simd::floor(x) + * rint(x) simd::rint(x) + * round(x) simd::round(x) + * trunc(x) simd::trunc(x) + * + * fdim(x,y) simd::fdim(x,y) + * fmax(x,y) simd::fmax(x,y) When one argument to fmin or fmax is + * fmin(x,y) simd::fmin(x,y) constant, use it as the *second* (y) + * argument to get better codegen on some + * architectures. E.g., write fmin(x,2) + * instead of fmin(2,x). + * fma(x,y,z) simd::fma(x,y,z) Fast on arm64 and when targeting AVX2 + * and later; may be quite expensive on + * older hardware. + * simd_muladd(x,y,z) simd::muladd(x,y,z) + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_MATH_HEADER +#define SIMD_MATH_HEADER + +#include <simd/base.h> +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include <simd/vector_make.h> +#include <simd/logic.h> + +#ifdef __cplusplus +extern "C" { +#endif +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_acos(simd_float2 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_acos(simd_float3 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_acos(simd_double3 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x); + +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_asin(simd_float2 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_asin(simd_float3 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_asin(simd_double3 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x); + +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atan(simd_float2 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atan(simd_float3 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atan(simd_double3 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cos(simd_float2 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cos(simd_float3 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cos(simd_double3 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sin(simd_float2 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sin(simd_float3 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sin(simd_double3 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tan(simd_float2 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tan(simd_float3 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tan(simd_double3 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x); + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cospi(simd_float2 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cospi(simd_float3 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cospi(simd_double3 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x); +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sinpi(simd_float2 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sinpi(simd_float3 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sinpi(simd_double3 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x); +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tanpi(simd_float2 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tanpi(simd_float3 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tanpi(simd_double3 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x); +#endif + +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_acosh(simd_float2 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_acosh(simd_float3 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_acosh(simd_double3 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_asinh(simd_float2 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_asinh(simd_float3 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_asinh(simd_double3 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atanh(simd_float2 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atanh(simd_float3 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atanh(simd_double3 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cosh(simd_float2 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cosh(simd_float3 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cosh(simd_double3 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sinh(simd_float2 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sinh(simd_float3 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sinh(simd_double3 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tanh(simd_float2 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tanh(simd_float3 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tanh(simd_double3 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x); + +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp2(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp2(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp2(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x); + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp10(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp10(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp10(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x); +#endif + +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_expm1(simd_float2 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_expm1(simd_float3 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_expm1(simd_double3 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log(simd_float2 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log(simd_float3 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log(simd_double3 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log2(simd_float2 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log2(simd_float3 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log2(simd_double3 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log10(simd_float2 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log10(simd_float3 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log10(simd_double3 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log1p(simd_float2 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log1p(simd_float3 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log1p(simd_double3 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x); + +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fabs(simd_float2 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fabs(simd_float3 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fabs(simd_float4 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fabs(simd_float8 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fabs(simd_float16 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fabs(simd_double2 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fabs(simd_double3 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fabs(simd_double4 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fabs(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cbrt(simd_float2 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cbrt(simd_float3 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cbrt(simd_double3 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sqrt(simd_float2 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sqrt(simd_float3 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sqrt(simd_float4 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sqrt(simd_float8 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sqrt(simd_float16 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sqrt(simd_double2 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sqrt(simd_double3 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sqrt(simd_double4 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sqrt(simd_double8 x); + +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_erf(simd_float2 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_erf(simd_float3 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_erf(simd_double3 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x); + +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_erfc(simd_float2 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_erfc(simd_float3 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_erfc(simd_double3 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tgamma(simd_float2 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tgamma(simd_float3 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tgamma(simd_double3 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x); + +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_ceil(simd_float2 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_ceil(simd_float3 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_ceil(simd_float4 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_ceil(simd_float8 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_ceil(simd_float16 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_ceil(simd_double2 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_ceil(simd_double3 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_ceil(simd_double4 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_ceil(simd_double8 x); + +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_floor(simd_float2 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_floor(simd_float3 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_floor(simd_float4 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_floor(simd_float8 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_floor(simd_float16 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_floor(simd_double2 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_floor(simd_double3 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_floor(simd_double4 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_floor(simd_double8 x); + +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_rint(simd_float2 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_rint(simd_float3 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_rint(simd_float4 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_rint(simd_float8 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_rint(simd_float16 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_rint(simd_double2 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_rint(simd_double3 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_rint(simd_double4 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_rint(simd_double8 x); + +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_round(simd_float2 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_round(simd_float3 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_round(simd_double3 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x); + +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_trunc(simd_float2 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_trunc(simd_float3 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_trunc(simd_float4 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_trunc(simd_float8 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_trunc(simd_float16 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_trunc(simd_double2 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_trunc(simd_double3 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_trunc(simd_double4 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_trunc(simd_double8 x); + + +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atan2(simd_float2 y, simd_float2 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atan2(simd_float3 y, simd_float3 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atan2(simd_double3 y, simd_double3 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x); + +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_hypot(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_hypot(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_hypot(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_pow(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_pow(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_pow(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmod(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmod(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmod(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_remainder(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_remainder(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_remainder(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_copysign(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_copysign(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_copysign(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_copysign(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_copysign(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_copysign(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_copysign(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_copysign(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_copysign(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_nextafter(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_nextafter(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_nextafter(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fdim(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fdim(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fdim(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fdim(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fdim(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fdim(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fdim(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fdim(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fdim(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmax(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmax(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmax(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmax(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmax(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmax(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmax(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmax(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmax(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmin(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmin(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmin(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmin(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmin(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmin(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmin(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmin(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmin(simd_double8 x, simd_double8 y); + + +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fma(simd_float2 x, simd_float2 y, simd_float2 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fma(simd_float3 x, simd_float3 y, simd_float3 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fma(simd_float4 x, simd_float4 y, simd_float4 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fma(simd_float8 x, simd_float8 y, simd_float8 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fma(simd_float16 x, simd_float16 y, simd_float16 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fma(simd_double2 x, simd_double2 y, simd_double2 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fma(simd_double3 x, simd_double3 y, simd_double3 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fma(simd_double4 x, simd_double4 y, simd_double4 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fma(simd_double8 x, simd_double8 y, simd_double8 z); + +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC float simd_muladd(float x, float y, float z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float2 simd_muladd(simd_float2 x, simd_float2 y, simd_float2 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float3 simd_muladd(simd_float3 x, simd_float3 y, simd_float3 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float4 simd_muladd(simd_float4 x, simd_float4 y, simd_float4 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float8 simd_muladd(simd_float8 x, simd_float8 y, simd_float8 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float16 simd_muladd(simd_float16 x, simd_float16 y, simd_float16 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC double simd_muladd(double x, double y, double z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double2 simd_muladd(simd_double2 x, simd_double2 y, simd_double2 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double3 simd_muladd(simd_double3 x, simd_double3 y, simd_double3 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double4 simd_muladd(simd_double4 x, simd_double4 y, simd_double4 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double8 simd_muladd(simd_double8 x, simd_double8 y, simd_double8 z); + +#ifdef __cplusplus +} /* extern "C" */ + +#include <cmath> +/*! @abstract Do not call this function directly; use simd::acos instead. */ +static SIMD_CPPFUNC float __tg_acos(float x) { return ::acos(x); } +/*! @abstract Do not call this function directly; use simd::acos instead. */ +static SIMD_CPPFUNC double __tg_acos(double x) { return ::acos(x); } +/*! @abstract Do not call this function directly; use simd::asin instead. */ +static SIMD_CPPFUNC float __tg_asin(float x) { return ::asin(x); } +/*! @abstract Do not call this function directly; use simd::asin instead. */ +static SIMD_CPPFUNC double __tg_asin(double x) { return ::asin(x); } +/*! @abstract Do not call this function directly; use simd::atan instead. */ +static SIMD_CPPFUNC float __tg_atan(float x) { return ::atan(x); } +/*! @abstract Do not call this function directly; use simd::atan instead. */ +static SIMD_CPPFUNC double __tg_atan(double x) { return ::atan(x); } +/*! @abstract Do not call this function directly; use simd::cos instead. */ +static SIMD_CPPFUNC float __tg_cos(float x) { return ::cos(x); } +/*! @abstract Do not call this function directly; use simd::cos instead. */ +static SIMD_CPPFUNC double __tg_cos(double x) { return ::cos(x); } +/*! @abstract Do not call this function directly; use simd::sin instead. */ +static SIMD_CPPFUNC float __tg_sin(float x) { return ::sin(x); } +/*! @abstract Do not call this function directly; use simd::sin instead. */ +static SIMD_CPPFUNC double __tg_sin(double x) { return ::sin(x); } +/*! @abstract Do not call this function directly; use simd::tan instead. */ +static SIMD_CPPFUNC float __tg_tan(float x) { return ::tan(x); } +/*! @abstract Do not call this function directly; use simd::tan instead. */ +static SIMD_CPPFUNC double __tg_tan(double x) { return ::tan(x); } +/*! @abstract Do not call this function directly; use simd::cospi instead. */ +static SIMD_CPPFUNC float __tg_cospi(float x) { return ::__cospi(x); } +/*! @abstract Do not call this function directly; use simd::cospi instead. */ +static SIMD_CPPFUNC double __tg_cospi(double x) { return ::__cospi(x); } +/*! @abstract Do not call this function directly; use simd::sinpi instead. */ +static SIMD_CPPFUNC float __tg_sinpi(float x) { return ::__sinpi(x); } +/*! @abstract Do not call this function directly; use simd::sinpi instead. */ +static SIMD_CPPFUNC double __tg_sinpi(double x) { return ::__sinpi(x); } +/*! @abstract Do not call this function directly; use simd::tanpi instead. */ +static SIMD_CPPFUNC float __tg_tanpi(float x) { return ::__tanpi(x); } +/*! @abstract Do not call this function directly; use simd::tanpi instead. */ +static SIMD_CPPFUNC double __tg_tanpi(double x) { return ::__tanpi(x); } +/*! @abstract Do not call this function directly; use simd::acosh instead. */ +static SIMD_CPPFUNC float __tg_acosh(float x) { return ::acosh(x); } +/*! @abstract Do not call this function directly; use simd::acosh instead. */ +static SIMD_CPPFUNC double __tg_acosh(double x) { return ::acosh(x); } +/*! @abstract Do not call this function directly; use simd::asinh instead. */ +static SIMD_CPPFUNC float __tg_asinh(float x) { return ::asinh(x); } +/*! @abstract Do not call this function directly; use simd::asinh instead. */ +static SIMD_CPPFUNC double __tg_asinh(double x) { return ::asinh(x); } +/*! @abstract Do not call this function directly; use simd::atanh instead. */ +static SIMD_CPPFUNC float __tg_atanh(float x) { return ::atanh(x); } +/*! @abstract Do not call this function directly; use simd::atanh instead. */ +static SIMD_CPPFUNC double __tg_atanh(double x) { return ::atanh(x); } +/*! @abstract Do not call this function directly; use simd::cosh instead. */ +static SIMD_CPPFUNC float __tg_cosh(float x) { return ::cosh(x); } +/*! @abstract Do not call this function directly; use simd::cosh instead. */ +static SIMD_CPPFUNC double __tg_cosh(double x) { return ::cosh(x); } +/*! @abstract Do not call this function directly; use simd::sinh instead. */ +static SIMD_CPPFUNC float __tg_sinh(float x) { return ::sinh(x); } +/*! @abstract Do not call this function directly; use simd::sinh instead. */ +static SIMD_CPPFUNC double __tg_sinh(double x) { return ::sinh(x); } +/*! @abstract Do not call this function directly; use simd::tanh instead. */ +static SIMD_CPPFUNC float __tg_tanh(float x) { return ::tanh(x); } +/*! @abstract Do not call this function directly; use simd::tanh instead. */ +static SIMD_CPPFUNC double __tg_tanh(double x) { return ::tanh(x); } +/*! @abstract Do not call this function directly; use simd::exp instead. */ +static SIMD_CPPFUNC float __tg_exp(float x) { return ::exp(x); } +/*! @abstract Do not call this function directly; use simd::exp instead. */ +static SIMD_CPPFUNC double __tg_exp(double x) { return ::exp(x); } +/*! @abstract Do not call this function directly; use simd::exp2 instead. */ +static SIMD_CPPFUNC float __tg_exp2(float x) { return ::exp2(x); } +/*! @abstract Do not call this function directly; use simd::exp2 instead. */ +static SIMD_CPPFUNC double __tg_exp2(double x) { return ::exp2(x); } +/*! @abstract Do not call this function directly; use simd::exp10 instead. */ +static SIMD_CPPFUNC float __tg_exp10(float x) { return ::__exp10(x); } +/*! @abstract Do not call this function directly; use simd::exp10 instead. */ +static SIMD_CPPFUNC double __tg_exp10(double x) { return ::__exp10(x); } +/*! @abstract Do not call this function directly; use simd::expm1 instead. */ +static SIMD_CPPFUNC float __tg_expm1(float x) { return ::expm1(x); } +/*! @abstract Do not call this function directly; use simd::expm1 instead. */ +static SIMD_CPPFUNC double __tg_expm1(double x) { return ::expm1(x); } +/*! @abstract Do not call this function directly; use simd::log instead. */ +static SIMD_CPPFUNC float __tg_log(float x) { return ::log(x); } +/*! @abstract Do not call this function directly; use simd::log instead. */ +static SIMD_CPPFUNC double __tg_log(double x) { return ::log(x); } +/*! @abstract Do not call this function directly; use simd::log2 instead. */ +static SIMD_CPPFUNC float __tg_log2(float x) { return ::log2(x); } +/*! @abstract Do not call this function directly; use simd::log2 instead. */ +static SIMD_CPPFUNC double __tg_log2(double x) { return ::log2(x); } +/*! @abstract Do not call this function directly; use simd::log10 instead. */ +static SIMD_CPPFUNC float __tg_log10(float x) { return ::log10(x); } +/*! @abstract Do not call this function directly; use simd::log10 instead. */ +static SIMD_CPPFUNC double __tg_log10(double x) { return ::log10(x); } +/*! @abstract Do not call this function directly; use simd::log1p instead. */ +static SIMD_CPPFUNC float __tg_log1p(float x) { return ::log1p(x); } +/*! @abstract Do not call this function directly; use simd::log1p instead. */ +static SIMD_CPPFUNC double __tg_log1p(double x) { return ::log1p(x); } +/*! @abstract Do not call this function directly; use simd::fabs instead. */ +static SIMD_CPPFUNC float __tg_fabs(float x) { return ::fabs(x); } +/*! @abstract Do not call this function directly; use simd::fabs instead. */ +static SIMD_CPPFUNC double __tg_fabs(double x) { return ::fabs(x); } +/*! @abstract Do not call this function directly; use simd::cbrt instead. */ +static SIMD_CPPFUNC float __tg_cbrt(float x) { return ::cbrt(x); } +/*! @abstract Do not call this function directly; use simd::cbrt instead. */ +static SIMD_CPPFUNC double __tg_cbrt(double x) { return ::cbrt(x); } +/*! @abstract Do not call this function directly; use simd::sqrt instead. */ +static SIMD_CPPFUNC float __tg_sqrt(float x) { return ::sqrt(x); } +/*! @abstract Do not call this function directly; use simd::sqrt instead. */ +static SIMD_CPPFUNC double __tg_sqrt(double x) { return ::sqrt(x); } +/*! @abstract Do not call this function directly; use simd::erf instead. */ +static SIMD_CPPFUNC float __tg_erf(float x) { return ::erf(x); } +/*! @abstract Do not call this function directly; use simd::erf instead. */ +static SIMD_CPPFUNC double __tg_erf(double x) { return ::erf(x); } +/*! @abstract Do not call this function directly; use simd::erfc instead. */ +static SIMD_CPPFUNC float __tg_erfc(float x) { return ::erfc(x); } +/*! @abstract Do not call this function directly; use simd::erfc instead. */ +static SIMD_CPPFUNC double __tg_erfc(double x) { return ::erfc(x); } +/*! @abstract Do not call this function directly; use simd::tgamma instead. */ +static SIMD_CPPFUNC float __tg_tgamma(float x) { return ::tgamma(x); } +/*! @abstract Do not call this function directly; use simd::tgamma instead. */ +static SIMD_CPPFUNC double __tg_tgamma(double x) { return ::tgamma(x); } +/*! @abstract Do not call this function directly; use simd::ceil instead. */ +static SIMD_CPPFUNC float __tg_ceil(float x) { return ::ceil(x); } +/*! @abstract Do not call this function directly; use simd::ceil instead. */ +static SIMD_CPPFUNC double __tg_ceil(double x) { return ::ceil(x); } +/*! @abstract Do not call this function directly; use simd::floor instead. */ +static SIMD_CPPFUNC float __tg_floor(float x) { return ::floor(x); } +/*! @abstract Do not call this function directly; use simd::floor instead. */ +static SIMD_CPPFUNC double __tg_floor(double x) { return ::floor(x); } +/*! @abstract Do not call this function directly; use simd::rint instead. */ +static SIMD_CPPFUNC float __tg_rint(float x) { return ::rint(x); } +/*! @abstract Do not call this function directly; use simd::rint instead. */ +static SIMD_CPPFUNC double __tg_rint(double x) { return ::rint(x); } +/*! @abstract Do not call this function directly; use simd::round instead. */ +static SIMD_CPPFUNC float __tg_round(float x) { return ::round(x); } +/*! @abstract Do not call this function directly; use simd::round instead. */ +static SIMD_CPPFUNC double __tg_round(double x) { return ::round(x); } +/*! @abstract Do not call this function directly; use simd::trunc instead. */ +static SIMD_CPPFUNC float __tg_trunc(float x) { return ::trunc(x); } +/*! @abstract Do not call this function directly; use simd::trunc instead. */ +static SIMD_CPPFUNC double __tg_trunc(double x) { return ::trunc(x); } +/*! @abstract Do not call this function directly; use simd::atan2 instead. */ +static SIMD_CPPFUNC float __tg_atan2(float x, float y) { return ::atan2(x, y); } +/*! @abstract Do not call this function directly; use simd::atan2 instead. */ +static SIMD_CPPFUNC double __tg_atan2(double x, float y) { return ::atan2(x, y); } +/*! @abstract Do not call this function directly; use simd::hypot instead. */ +static SIMD_CPPFUNC float __tg_hypot(float x, float y) { return ::hypot(x, y); } +/*! @abstract Do not call this function directly; use simd::hypot instead. */ +static SIMD_CPPFUNC double __tg_hypot(double x, float y) { return ::hypot(x, y); } +/*! @abstract Do not call this function directly; use simd::pow instead. */ +static SIMD_CPPFUNC float __tg_pow(float x, float y) { return ::pow(x, y); } +/*! @abstract Do not call this function directly; use simd::pow instead. */ +static SIMD_CPPFUNC double __tg_pow(double x, float y) { return ::pow(x, y); } +/*! @abstract Do not call this function directly; use simd::fmod instead. */ +static SIMD_CPPFUNC float __tg_fmod(float x, float y) { return ::fmod(x, y); } +/*! @abstract Do not call this function directly; use simd::fmod instead. */ +static SIMD_CPPFUNC double __tg_fmod(double x, float y) { return ::fmod(x, y); } +/*! @abstract Do not call this function directly; use simd::remainder + * instead. */ +static SIMD_CPPFUNC float __tg_remainder(float x, float y) { return ::remainder(x, y); } +/*! @abstract Do not call this function directly; use simd::remainder + * instead. */ +static SIMD_CPPFUNC double __tg_remainder(double x, float y) { return ::remainder(x, y); } +/*! @abstract Do not call this function directly; use simd::copysign + * instead. */ +static SIMD_CPPFUNC float __tg_copysign(float x, float y) { return ::copysign(x, y); } +/*! @abstract Do not call this function directly; use simd::copysign + * instead. */ +static SIMD_CPPFUNC double __tg_copysign(double x, float y) { return ::copysign(x, y); } +/*! @abstract Do not call this function directly; use simd::nextafter + * instead. */ +static SIMD_CPPFUNC float __tg_nextafter(float x, float y) { return ::nextafter(x, y); } +/*! @abstract Do not call this function directly; use simd::nextafter + * instead. */ +static SIMD_CPPFUNC double __tg_nextafter(double x, float y) { return ::nextafter(x, y); } +/*! @abstract Do not call this function directly; use simd::fdim instead. */ +static SIMD_CPPFUNC float __tg_fdim(float x, float y) { return ::fdim(x, y); } +/*! @abstract Do not call this function directly; use simd::fdim instead. */ +static SIMD_CPPFUNC double __tg_fdim(double x, float y) { return ::fdim(x, y); } +/*! @abstract Do not call this function directly; use simd::fmax instead. */ +static SIMD_CPPFUNC float __tg_fmax(float x, float y) { return ::fmax(x, y); } +/*! @abstract Do not call this function directly; use simd::fmax instead. */ +static SIMD_CPPFUNC double __tg_fmax(double x, float y) { return ::fmax(x, y); } +/*! @abstract Do not call this function directly; use simd::fmin instead. */ +static SIMD_CPPFUNC float __tg_fmin(float x, float y) { return ::fmin(x, y); } +/*! @abstract Do not call this function directly; use simd::fmin instead. */ +static SIMD_CPPFUNC double __tg_fmin(double x, float y) { return ::fmin(x, y); } +/*! @abstract Do not call this function directly; use simd::fma instead. */ +static SIMD_CPPFUNC float __tg_fma(float x, float y, float z) { return ::fma(x, y, z); } +/*! @abstract Do not call this function directly; use simd::fma instead. */ +static SIMD_CPPFUNC double __tg_fma(double x, double y, double z) { return ::fma(x, y, z); } + +namespace simd { +/*! @abstract Generalizes the <cmath> function acos to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN acos(fptypeN x) { return ::__tg_acos(x); } + +/*! @abstract Generalizes the <cmath> function asin to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN asin(fptypeN x) { return ::__tg_asin(x); } + +/*! @abstract Generalizes the <cmath> function atan to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN atan(fptypeN x) { return ::__tg_atan(x); } + +/*! @abstract Generalizes the <cmath> function cos to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN cos(fptypeN x) { return ::__tg_cos(x); } + +/*! @abstract Generalizes the <cmath> function sin to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN sin(fptypeN x) { return ::__tg_sin(x); } + +/*! @abstract Generalizes the <cmath> function tan to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN tan(fptypeN x) { return ::__tg_tan(x); } + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the <cmath> function cospi to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN cospi(fptypeN x) { return ::__tg_cospi(x); } +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the <cmath> function sinpi to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN sinpi(fptypeN x) { return ::__tg_sinpi(x); } +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the <cmath> function tanpi to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN tanpi(fptypeN x) { return ::__tg_tanpi(x); } +#endif + +/*! @abstract Generalizes the <cmath> function acosh to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN acosh(fptypeN x) { return ::__tg_acosh(x); } + +/*! @abstract Generalizes the <cmath> function asinh to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN asinh(fptypeN x) { return ::__tg_asinh(x); } + +/*! @abstract Generalizes the <cmath> function atanh to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN atanh(fptypeN x) { return ::__tg_atanh(x); } + +/*! @abstract Generalizes the <cmath> function cosh to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN cosh(fptypeN x) { return ::__tg_cosh(x); } + +/*! @abstract Generalizes the <cmath> function sinh to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN sinh(fptypeN x) { return ::__tg_sinh(x); } + +/*! @abstract Generalizes the <cmath> function tanh to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN tanh(fptypeN x) { return ::__tg_tanh(x); } + +/*! @abstract Generalizes the <cmath> function exp to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN exp(fptypeN x) { return ::__tg_exp(x); } + +/*! @abstract Generalizes the <cmath> function exp2 to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN exp2(fptypeN x) { return ::__tg_exp2(x); } + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the <cmath> function exp10 to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN exp10(fptypeN x) { return ::__tg_exp10(x); } +#endif + +/*! @abstract Generalizes the <cmath> function expm1 to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN expm1(fptypeN x) { return ::__tg_expm1(x); } + +/*! @abstract Generalizes the <cmath> function log to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN log(fptypeN x) { return ::__tg_log(x); } + +/*! @abstract Generalizes the <cmath> function log2 to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN log2(fptypeN x) { return ::__tg_log2(x); } + +/*! @abstract Generalizes the <cmath> function log10 to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN log10(fptypeN x) { return ::__tg_log10(x); } + +/*! @abstract Generalizes the <cmath> function log1p to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN log1p(fptypeN x) { return ::__tg_log1p(x); } + +/*! @abstract Generalizes the <cmath> function fabs to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fabs(fptypeN x) { return ::__tg_fabs(x); } + +/*! @abstract Generalizes the <cmath> function cbrt to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN cbrt(fptypeN x) { return ::__tg_cbrt(x); } + +/*! @abstract Generalizes the <cmath> function sqrt to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN sqrt(fptypeN x) { return ::__tg_sqrt(x); } + +/*! @abstract Generalizes the <cmath> function erf to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN erf(fptypeN x) { return ::__tg_erf(x); } + +/*! @abstract Generalizes the <cmath> function erfc to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN erfc(fptypeN x) { return ::__tg_erfc(x); } + +/*! @abstract Generalizes the <cmath> function tgamma to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN tgamma(fptypeN x) { return ::__tg_tgamma(x); } + +/*! @abstract Generalizes the <cmath> function ceil to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN ceil(fptypeN x) { return ::__tg_ceil(x); } + +/*! @abstract Generalizes the <cmath> function floor to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN floor(fptypeN x) { return ::__tg_floor(x); } + +/*! @abstract Generalizes the <cmath> function rint to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN rint(fptypeN x) { return ::__tg_rint(x); } + +/*! @abstract Generalizes the <cmath> function round to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN round(fptypeN x) { return ::__tg_round(x); } + +/*! @abstract Generalizes the <cmath> function trunc to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN trunc(fptypeN x) { return ::__tg_trunc(x); } + +/*! @abstract Generalizes the <cmath> function atan2 to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN atan2(fptypeN y, fptypeN x) { return ::__tg_atan2(y, x); } + +/*! @abstract Generalizes the <cmath> function hypot to operate on vectors + * of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN hypot(fptypeN x, fptypeN y) { return ::__tg_hypot(x, y); } + +/*! @abstract Generalizes the <cmath> function pow to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN pow(fptypeN x, fptypeN y) { return ::__tg_pow(x, y); } + +/*! @abstract Generalizes the <cmath> function fmod to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fmod(fptypeN x, fptypeN y) { return ::__tg_fmod(x, y); } + +/*! @abstract Generalizes the <cmath> function remainder to operate on + * vectors of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN remainder(fptypeN x, fptypeN y) { return ::__tg_remainder(x, y); } + +/*! @abstract Generalizes the <cmath> function copysign to operate on + * vectors of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN copysign(fptypeN x, fptypeN y) { return ::__tg_copysign(x, y); } + +/*! @abstract Generalizes the <cmath> function nextafter to operate on + * vectors of floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN nextafter(fptypeN x, fptypeN y) { return ::__tg_nextafter(x, y); } + +/*! @abstract Generalizes the <cmath> function fdim to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fdim(fptypeN x, fptypeN y) { return ::__tg_fdim(x, y); } + +/*! @abstract Generalizes the <cmath> function fmax to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fmax(fptypeN x, fptypeN y) { return ::__tg_fmax(x, y); } + +/*! @abstract Generalizes the <cmath> function fmin to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fmin(fptypeN x, fptypeN y) { return ::__tg_fmin(x, y); } + +/*! @abstract Generalizes the <cmath> function fma to operate on vectors of + * floats and doubles. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN fma(fptypeN x, fptypeN y, fptypeN z) { return ::__tg_fma(x, y, z); } + +/*! @abstract Computes x*y + z by the most efficient means available; either + * a fused multiply add or separate multiply and add. */ + template <typename fptypeN> + static SIMD_CPPFUNC fptypeN muladd(fptypeN x, fptypeN y, fptypeN z) { return ::simd_muladd(x, y, z); } +}; + +extern "C" { +#else +#include <tgmath.h> +/* C and Objective-C, we need some infrastructure to piggyback on tgmath.h */ +static SIMD_OVERLOAD simd_float2 __tg_promote(simd_float2); +static SIMD_OVERLOAD simd_float3 __tg_promote(simd_float3); +static SIMD_OVERLOAD simd_float4 __tg_promote(simd_float4); +static SIMD_OVERLOAD simd_float8 __tg_promote(simd_float8); +static SIMD_OVERLOAD simd_float16 __tg_promote(simd_float16); +static SIMD_OVERLOAD simd_double2 __tg_promote(simd_double2); +static SIMD_OVERLOAD simd_double3 __tg_promote(simd_double3); +static SIMD_OVERLOAD simd_double4 __tg_promote(simd_double4); +static SIMD_OVERLOAD simd_double8 __tg_promote(simd_double8); + +/* Apple extensions to <math.h>, added in macOS 10.9 and iOS 7.0 */ +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0 || \ + __DRIVERKIT_VERSION_MIN_REQUIRED >= __DRIVERKIT_19_0 +static inline SIMD_CFUNC float __tg_cospi(float x) { return __cospif(x); } +static inline SIMD_CFUNC double __tg_cospi(double x) { return __cospi(x); } +#undef cospi +/*! @abstract `cospi(x)` computes `cos(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `cos`. Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by `<simd/simd.h>`. */ +#define cospi(__x) __tg_cospi(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC float __tg_sinpi(float x) { return __sinpif(x); } +static inline SIMD_CFUNC double __tg_sinpi(double x) { return __sinpi(x); } +#undef sinpi +/*! @abstract `sinpi(x)` computes `sin(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `sin`. Defined for `float` and `double` as well as vectors + * of floats and doubles as provided by `<simd/simd.h>`. */ +#define sinpi(__x) __tg_sinpi(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC float __tg_tanpi(float x) { return __tanpif(x); } +static inline SIMD_CFUNC double __tg_tanpi(double x) { return __tanpi(x); } +#undef tanpi +/*! @abstract `tanpi(x)` computes `tan(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `tan`. Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by `<simd/simd.h>`. */ +#define tanpi(__x) __tg_tanpi(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC float __tg_exp10(float x) { return __exp10f(x); } +static inline SIMD_CFUNC double __tg_exp10(double x) { return __exp10(x); } +#undef exp10 +/*! @abstract `exp10(x)` computes `10**x` more efficiently and accurately + * than `pow(10, x)`. + * + * @discussion Defined for `float` and `double` as well as vectors of floats + * and doubles as provided by `<simd/simd.h>`. */ +#define exp10(__x) __tg_exp10(__tg_promote1((__x))(__x)) +#endif + + +#endif /* !__cplusplus */ + +#pragma mark - fabs implementation +static inline SIMD_CFUNC simd_float2 __tg_fabs(simd_float2 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float3 __tg_fabs(simd_float3 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float4 __tg_fabs(simd_float4 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float8 __tg_fabs(simd_float8 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float16 __tg_fabs(simd_float16 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_double2 __tg_fabs(simd_double2 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double3 __tg_fabs(simd_double3 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double4 __tg_fabs(simd_double4 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double8 __tg_fabs(simd_double8 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffffL); } + +#pragma mark - fmin, fmax implementation +static SIMD_CFUNC simd_float2 __tg_fmin(simd_float2 x, simd_float2 y) { +#if defined __SSE2__ + return simd_make_float2(__tg_fmin(simd_make_float4_undef(x), simd_make_float4_undef(y))); +#elif defined __arm64__ + return vminnm_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmin_f32(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_fmin(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmin(simd_make_float4_undef(x), simd_make_float4_undef(y))); +} + +static SIMD_CFUNC simd_float4 __tg_fmin(simd_float4 x, simd_float4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm_range_ps(x, y, 4); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_min_ps(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_min_ps(x, y), x, y != y); +#elif defined __arm64__ + return vminnmq_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vminq_f32(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_fmin(simd_float8 x, simd_float8 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm256_range_ps(x, y, 4); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_min_ps(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_min_ps(x, y), x, y != y); +#else + return simd_make_float8(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_fmin(simd_float16 x, simd_float16 y) { +#if defined __x86_64__ && defined __AVX512DQ__ && !__FINITE_MATH_ONLY__ + return _mm512_range_ps(x, y, 4); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_min_ps(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_min_ps(x, y), x, y != y); +#else + return simd_make_float16(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_fmin(simd_double2 x, simd_double2 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm_range_pd(x, y, 4); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_min_pd(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_min_pd(x, y), x, y != y); +#elif defined __arm64__ + return vminnmq_f64(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_fmin(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmin(simd_make_double4_undef(x), simd_make_double4_undef(y))); +} + +static SIMD_CFUNC simd_double4 __tg_fmin(simd_double4 x, simd_double4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm256_range_pd(x, y, 4); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_min_pd(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_min_pd(x, y), x, y != y); +#else + return simd_make_double4(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_fmin(simd_double8 x, simd_double8 y) { +#if defined __x86_64__ && defined __AVX512DQ__ + return _mm512_range_pd(x, y, 4); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_min_pd(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_min_pd(x, y), x, y != y); +#else + return simd_make_double8(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_fmax(simd_float2 x, simd_float2 y) { +#if defined __SSE2__ + return simd_make_float2(__tg_fmax(simd_make_float4_undef(x), simd_make_float4_undef(y))); +#elif defined __arm64__ + return vmaxnm_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmax_f32(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_fmax(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmax(simd_make_float4_undef(x), simd_make_float4_undef(y))); +} + +static SIMD_CFUNC simd_float4 __tg_fmax(simd_float4 x, simd_float4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm_range_ps(x, y, 5); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_max_ps(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_max_ps(x, y), x, y != y); +#elif defined __arm64__ + return vmaxnmq_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmaxq_f32(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_fmax(simd_float8 x, simd_float8 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm256_range_ps(x, y, 5); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_max_ps(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_max_ps(x, y), x, y != y); +#else + return simd_make_float8(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_fmax(simd_float16 x, simd_float16 y) { +#if defined __x86_64__ && defined __AVX512DQ__ && !__FINITE_MATH_ONLY__ + return _mm512_range_ps(x, y, 5); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_max_ps(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_max_ps(x, y), x, y != y); +#else + return simd_make_float16(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_fmax(simd_double2 x, simd_double2 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm_range_pd(x, y, 5); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_max_pd(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_max_pd(x, y), x, y != y); +#elif defined __arm64__ + return vmaxnmq_f64(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_fmax(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmax(simd_make_double4_undef(x), simd_make_double4_undef(y))); +} + +static SIMD_CFUNC simd_double4 __tg_fmax(simd_double4 x, simd_double4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm256_range_pd(x, y, 5); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_max_pd(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_max_pd(x, y), x, y != y); +#else + return simd_make_double4(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_fmax(simd_double8 x, simd_double8 y) { +#if defined __x86_64__ && defined __AVX512DQ__ + return _mm512_range_pd(x, y, 5); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_max_pd(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_max_pd(x, y), x, y != y); +#else + return simd_make_double8(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +#pragma mark - copysign implementation +static inline SIMD_CFUNC simd_float2 __tg_copysign(simd_float2 x, simd_float2 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float3 __tg_copysign(simd_float3 x, simd_float3 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float4 __tg_copysign(simd_float4 x, simd_float4 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float8 __tg_copysign(simd_float8 x, simd_float8 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float16 __tg_copysign(simd_float16 x, simd_float16 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_double2 __tg_copysign(simd_double2 x, simd_double2 y) { return simd_bitselect(y, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double3 __tg_copysign(simd_double3 x, simd_double3 y) { return simd_bitselect(y, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double4 __tg_copysign(simd_double4 x, simd_double4 y) { return simd_bitselect(y, x, 0x7fffffffffffffffL); } +static inline SIMD_CFUNC simd_double8 __tg_copysign(simd_double8 x, simd_double8 y) { return simd_bitselect(y, x, 0x7fffffffffffffffL); } + +#pragma mark - sqrt implementation +static SIMD_CFUNC simd_float2 __tg_sqrt(simd_float2 x) { +#if defined __SSE2__ + return simd_make_float2(__tg_sqrt(simd_make_float4_undef(x))); +#elif defined __arm64__ + return vsqrt_f32(x); +#else + return simd_make_float2(sqrt(x.x), sqrt(x.y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_sqrt(simd_float3 x) { + return simd_make_float3(__tg_sqrt(simd_make_float4_undef(x))); +} + +static SIMD_CFUNC simd_float4 __tg_sqrt(simd_float4 x) { +#if defined __SSE2__ + return _mm_sqrt_ps(x); +#elif defined __arm64__ + return vsqrtq_f32(x); +#else + return simd_make_float4(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_sqrt(simd_float8 x) { +#if defined __AVX__ + return _mm256_sqrt_ps(x); +#else + return simd_make_float8(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_sqrt(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_sqrt_ps(x); +#else + return simd_make_float16(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_sqrt(simd_double2 x) { +#if defined __SSE2__ + return _mm_sqrt_pd(x); +#elif defined __arm64__ + return vsqrtq_f64(x); +#else + return simd_make_double2(sqrt(x.x), sqrt(x.y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_sqrt(simd_double3 x) { + return simd_make_double3(__tg_sqrt(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_sqrt(simd_double4 x) { +#if defined __AVX__ + return _mm256_sqrt_pd(x); +#else + return simd_make_double4(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_sqrt(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_sqrt_pd(x); +#else + return simd_make_double8(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +#pragma mark - ceil, floor, rint, trunc implementation +static SIMD_CFUNC simd_float2 __tg_ceil(simd_float2 x) { +#if defined __arm64__ + return vrndp_f32(x); +#else + return simd_make_float2(__tg_ceil(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_ceil(simd_float3 x) { + return simd_make_float3(__tg_ceil(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_ceil_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_ceil(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndpq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_ceil_f4(x); +#else + simd_float4 truncated = __tg_trunc(x); + simd_float4 adjust = simd_bitselect((simd_float4)0, 1, truncated < x); + return __tg_copysign(truncated + adjust, x); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_ceil(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_ceil(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_ceil_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_ceil(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndpq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_ceil_d2(x); +#else + simd_double2 truncated = __tg_trunc(x); + simd_double2 adjust = simd_bitselect((simd_double2)0, 1, truncated < x); + return __tg_copysign(truncated + adjust, x); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_ceil(simd_double3 x) { + return simd_make_double3(__tg_ceil(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_ceil(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_ceil(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_floor(simd_float2 x) { +#if defined __arm64__ + return vrndm_f32(x); +#else + return simd_make_float2(__tg_floor(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_floor(simd_float3 x) { + return simd_make_float3(__tg_floor(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_floor_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_floor(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndmq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_floor_f4(x); +#else + simd_float4 truncated = __tg_trunc(x); + simd_float4 adjust = simd_bitselect((simd_float4)0, 1, truncated > x); + return truncated - adjust; +#endif +} + +static SIMD_CFUNC simd_float8 __tg_floor(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_floor(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_floor_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_floor(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndmq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_floor_d2(x); +#else + simd_double2 truncated = __tg_trunc(x); + simd_double2 adjust = simd_bitselect((simd_double2)0, 1, truncated > x); + return truncated - adjust; +#endif +} + +static SIMD_CFUNC simd_double3 __tg_floor(simd_double3 x) { + return simd_make_double3(__tg_floor(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_floor(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_floor(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_rint(simd_float2 x) { +#if defined __arm64__ + return vrndx_f32(x); +#else + return simd_make_float2(__tg_rint(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_rint(simd_float3 x) { + return simd_make_float3(__tg_rint(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_rint_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_rint(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_RINT); +#elif defined __arm64__ + return vrndxq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_rint_f4(x); +#else + simd_float4 magic = __tg_copysign(0x1.0p23, x); + simd_int4 x_is_small = __tg_fabs(x) < 0x1.0p23; + return simd_bitselect(x, (x + magic) - magic, x_is_small & 0x7fffffff); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_rint(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_RINT); +#else + return simd_make_float8(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_rint(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_RINT); +#else + return simd_make_float16(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_rint_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_rint(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_RINT); +#elif defined __arm64__ + return vrndxq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_rint_d2(x); +#else + simd_double2 magic = __tg_copysign(0x1.0p52, x); + simd_long2 x_is_small = __tg_fabs(x) < 0x1.0p52; + return simd_bitselect(x, (x + magic) - magic, x_is_small & 0x7fffffffffffffff); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_rint(simd_double3 x) { + return simd_make_double3(__tg_rint(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_rint(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_RINT); +#else + return simd_make_double4(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_rint(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_RINT); +#else + return simd_make_double8(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_trunc(simd_float2 x) { +#if defined __arm64__ + return vrnd_f32(x); +#else + return simd_make_float2(__tg_trunc(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_trunc(simd_float3 x) { + return simd_make_float3(__tg_trunc(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_trunc_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_trunc(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_trunc_f4(x); +#else + simd_float4 binade = simd_bitselect(0, x, 0x7f800000); + simd_int4 mask = (simd_int4)__tg_fmin(-2*binade + 1, -0); + simd_float4 result = simd_bitselect(0, x, mask); + return simd_bitselect(x, result, binade < 0x1.0p23); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_trunc(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_trunc(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_trunc_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_trunc(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_trunc_d2(x); +#else + simd_double2 binade = simd_bitselect(0, x, 0x7ff0000000000000); + simd_long2 mask = (simd_long2)__tg_fmin(-2*binade + 1, -0); + simd_double2 result = simd_bitselect(0, x, mask); + return simd_bitselect(x, result, binade < 0x1.0p52); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_trunc(simd_double3 x) { + return simd_make_double3(__tg_trunc(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_trunc(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_trunc(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +#pragma mark - sine, cosine implementation +static inline SIMD_CFUNC simd_float2 __tg_sin(simd_float2 x) { + return simd_make_float2(__tg_sin(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sin(simd_float3 x) { + return simd_make_float3(__tg_sin(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return _simd_sin_f4(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_float4 __sin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return __sin_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return simd_make_float4(sin(x.x), sin(x.y), sin(x.z), sin(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sin_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x) { + return _simd_sin_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x) { + return simd_make_float8(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sin_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x) { + return _simd_sin_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x) { + return simd_make_float16(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return _simd_sin_d2(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_double2 __sin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return __sin_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return simd_make_double2(sin(x.x), sin(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sin(simd_double3 x) { + return simd_make_double3(__tg_sin(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sin_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x) { + return _simd_sin_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x) { + return simd_make_double4(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sin_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x) { + return _simd_sin_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x) { + return simd_make_double8(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +static inline SIMD_CFUNC simd_float2 __tg_cos(simd_float2 x) { + return simd_make_float2(__tg_cos(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cos(simd_float3 x) { + return simd_make_float3(__tg_cos(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return _simd_cos_f4(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_float4 __cos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return __cos_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return simd_make_float4(cos(x.x), cos(x.y), cos(x.z), cos(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cos_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x) { + return _simd_cos_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x) { + return simd_make_float8(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cos_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x) { + return _simd_cos_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x) { + return simd_make_float16(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return _simd_cos_d2(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_double2 __cos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return __cos_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return simd_make_double2(cos(x.x), cos(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cos(simd_double3 x) { + return simd_make_double3(__tg_cos(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cos_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x) { + return _simd_cos_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x) { + return simd_make_double4(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cos_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x) { + return _simd_cos_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x) { + return simd_make_double8(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + + +#pragma mark - acos implementation +static inline SIMD_CFUNC simd_float2 __tg_acos(simd_float2 x) { + return simd_make_float2(__tg_acos(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_acos(simd_float3 x) { + return simd_make_float3(__tg_acos(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_acos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x) { + return _simd_acos_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x) { + return simd_make_float4(acos(x.x), acos(x.y), acos(x.z), acos(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_acos_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x) { + return _simd_acos_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x) { + return simd_make_float8(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_acos_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x) { + return _simd_acos_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x) { + return simd_make_float16(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_acos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x) { + return _simd_acos_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x) { + return simd_make_double2(acos(x.x), acos(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_acos(simd_double3 x) { + return simd_make_double3(__tg_acos(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_acos_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x) { + return _simd_acos_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x) { + return simd_make_double4(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_acos_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x) { + return _simd_acos_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x) { + return simd_make_double8(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#pragma mark - asin implementation +static inline SIMD_CFUNC simd_float2 __tg_asin(simd_float2 x) { + return simd_make_float2(__tg_asin(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_asin(simd_float3 x) { + return simd_make_float3(__tg_asin(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_asin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x) { + return _simd_asin_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x) { + return simd_make_float4(asin(x.x), asin(x.y), asin(x.z), asin(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_asin_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x) { + return _simd_asin_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x) { + return simd_make_float8(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_asin_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x) { + return _simd_asin_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x) { + return simd_make_float16(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_asin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x) { + return _simd_asin_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x) { + return simd_make_double2(asin(x.x), asin(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_asin(simd_double3 x) { + return simd_make_double3(__tg_asin(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_asin_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x) { + return _simd_asin_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x) { + return simd_make_double4(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_asin_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x) { + return _simd_asin_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x) { + return simd_make_double8(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#pragma mark - atan implementation +static inline SIMD_CFUNC simd_float2 __tg_atan(simd_float2 x) { + return simd_make_float2(__tg_atan(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atan(simd_float3 x) { + return simd_make_float3(__tg_atan(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atan_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x) { + return _simd_atan_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x) { + return simd_make_float4(atan(x.x), atan(x.y), atan(x.z), atan(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atan_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x) { + return _simd_atan_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x) { + return simd_make_float8(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atan_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x) { + return _simd_atan_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x) { + return simd_make_float16(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atan_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x) { + return _simd_atan_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x) { + return simd_make_double2(atan(x.x), atan(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atan(simd_double3 x) { + return simd_make_double3(__tg_atan(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atan_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x) { + return _simd_atan_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x) { + return simd_make_double4(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atan_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x) { + return _simd_atan_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x) { + return simd_make_double8(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#pragma mark - tan implementation +static inline SIMD_CFUNC simd_float2 __tg_tan(simd_float2 x) { + return simd_make_float2(__tg_tan(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tan(simd_float3 x) { + return simd_make_float3(__tg_tan(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tan_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x) { + return _simd_tan_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x) { + return simd_make_float4(tan(x.x), tan(x.y), tan(x.z), tan(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tan_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x) { + return _simd_tan_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x) { + return simd_make_float8(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tan_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x) { + return _simd_tan_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x) { + return simd_make_float16(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tan_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x) { + return _simd_tan_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x) { + return simd_make_double2(tan(x.x), tan(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tan(simd_double3 x) { + return simd_make_double3(__tg_tan(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tan_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x) { + return _simd_tan_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x) { + return simd_make_double4(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tan_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x) { + return _simd_tan_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x) { + return simd_make_double8(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#pragma mark - cospi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_cospi(simd_float2 x) { + return simd_make_float2(__tg_cospi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cospi(simd_float3 x) { + return simd_make_float3(__tg_cospi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cospi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x) { + return _simd_cospi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x) { + return simd_make_float4(__cospi(x.x), __cospi(x.y), __cospi(x.z), __cospi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cospi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x) { + return _simd_cospi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x) { + return simd_make_float8(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cospi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x) { + return _simd_cospi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x) { + return simd_make_float16(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cospi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x) { + return _simd_cospi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x) { + return simd_make_double2(__cospi(x.x), __cospi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cospi(simd_double3 x) { + return simd_make_double3(__tg_cospi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cospi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x) { + return _simd_cospi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x) { + return simd_make_double4(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cospi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x) { + return _simd_cospi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x) { + return simd_make_double8(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - sinpi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_sinpi(simd_float2 x) { + return simd_make_float2(__tg_sinpi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sinpi(simd_float3 x) { + return simd_make_float3(__tg_sinpi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sinpi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x) { + return _simd_sinpi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x) { + return simd_make_float4(__sinpi(x.x), __sinpi(x.y), __sinpi(x.z), __sinpi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sinpi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x) { + return _simd_sinpi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x) { + return simd_make_float8(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sinpi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x) { + return _simd_sinpi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x) { + return simd_make_float16(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sinpi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x) { + return _simd_sinpi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x) { + return simd_make_double2(__sinpi(x.x), __sinpi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sinpi(simd_double3 x) { + return simd_make_double3(__tg_sinpi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sinpi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x) { + return _simd_sinpi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x) { + return simd_make_double4(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sinpi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x) { + return _simd_sinpi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x) { + return simd_make_double8(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - tanpi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_tanpi(simd_float2 x) { + return simd_make_float2(__tg_tanpi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tanpi(simd_float3 x) { + return simd_make_float3(__tg_tanpi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tanpi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x) { + return _simd_tanpi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x) { + return simd_make_float4(__tanpi(x.x), __tanpi(x.y), __tanpi(x.z), __tanpi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tanpi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x) { + return _simd_tanpi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x) { + return simd_make_float8(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tanpi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x) { + return _simd_tanpi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x) { + return simd_make_float16(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tanpi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x) { + return _simd_tanpi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x) { + return simd_make_double2(__tanpi(x.x), __tanpi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tanpi(simd_double3 x) { + return simd_make_double3(__tg_tanpi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tanpi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x) { + return _simd_tanpi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x) { + return simd_make_double4(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tanpi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x) { + return _simd_tanpi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x) { + return simd_make_double8(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - acosh implementation +static inline SIMD_CFUNC simd_float2 __tg_acosh(simd_float2 x) { + return simd_make_float2(__tg_acosh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_acosh(simd_float3 x) { + return simd_make_float3(__tg_acosh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_acosh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x) { + return _simd_acosh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x) { + return simd_make_float4(acosh(x.x), acosh(x.y), acosh(x.z), acosh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_acosh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x) { + return _simd_acosh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x) { + return simd_make_float8(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_acosh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x) { + return _simd_acosh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x) { + return simd_make_float16(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_acosh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x) { + return _simd_acosh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x) { + return simd_make_double2(acosh(x.x), acosh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_acosh(simd_double3 x) { + return simd_make_double3(__tg_acosh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_acosh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x) { + return _simd_acosh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x) { + return simd_make_double4(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_acosh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x) { + return _simd_acosh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x) { + return simd_make_double8(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#pragma mark - asinh implementation +static inline SIMD_CFUNC simd_float2 __tg_asinh(simd_float2 x) { + return simd_make_float2(__tg_asinh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_asinh(simd_float3 x) { + return simd_make_float3(__tg_asinh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_asinh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x) { + return _simd_asinh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x) { + return simd_make_float4(asinh(x.x), asinh(x.y), asinh(x.z), asinh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_asinh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x) { + return _simd_asinh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x) { + return simd_make_float8(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_asinh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x) { + return _simd_asinh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x) { + return simd_make_float16(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_asinh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x) { + return _simd_asinh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x) { + return simd_make_double2(asinh(x.x), asinh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_asinh(simd_double3 x) { + return simd_make_double3(__tg_asinh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_asinh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x) { + return _simd_asinh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x) { + return simd_make_double4(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_asinh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x) { + return _simd_asinh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x) { + return simd_make_double8(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#pragma mark - atanh implementation +static inline SIMD_CFUNC simd_float2 __tg_atanh(simd_float2 x) { + return simd_make_float2(__tg_atanh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atanh(simd_float3 x) { + return simd_make_float3(__tg_atanh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atanh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x) { + return _simd_atanh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x) { + return simd_make_float4(atanh(x.x), atanh(x.y), atanh(x.z), atanh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atanh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x) { + return _simd_atanh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x) { + return simd_make_float8(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atanh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x) { + return _simd_atanh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x) { + return simd_make_float16(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atanh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x) { + return _simd_atanh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x) { + return simd_make_double2(atanh(x.x), atanh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atanh(simd_double3 x) { + return simd_make_double3(__tg_atanh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atanh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x) { + return _simd_atanh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x) { + return simd_make_double4(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atanh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x) { + return _simd_atanh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x) { + return simd_make_double8(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#pragma mark - cosh implementation +static inline SIMD_CFUNC simd_float2 __tg_cosh(simd_float2 x) { + return simd_make_float2(__tg_cosh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cosh(simd_float3 x) { + return simd_make_float3(__tg_cosh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cosh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x) { + return _simd_cosh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x) { + return simd_make_float4(cosh(x.x), cosh(x.y), cosh(x.z), cosh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cosh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x) { + return _simd_cosh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x) { + return simd_make_float8(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cosh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x) { + return _simd_cosh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x) { + return simd_make_float16(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cosh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x) { + return _simd_cosh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x) { + return simd_make_double2(cosh(x.x), cosh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cosh(simd_double3 x) { + return simd_make_double3(__tg_cosh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cosh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x) { + return _simd_cosh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x) { + return simd_make_double4(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cosh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x) { + return _simd_cosh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x) { + return simd_make_double8(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#pragma mark - sinh implementation +static inline SIMD_CFUNC simd_float2 __tg_sinh(simd_float2 x) { + return simd_make_float2(__tg_sinh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sinh(simd_float3 x) { + return simd_make_float3(__tg_sinh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sinh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x) { + return _simd_sinh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x) { + return simd_make_float4(sinh(x.x), sinh(x.y), sinh(x.z), sinh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sinh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x) { + return _simd_sinh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x) { + return simd_make_float8(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sinh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x) { + return _simd_sinh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x) { + return simd_make_float16(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sinh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x) { + return _simd_sinh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x) { + return simd_make_double2(sinh(x.x), sinh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sinh(simd_double3 x) { + return simd_make_double3(__tg_sinh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sinh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x) { + return _simd_sinh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x) { + return simd_make_double4(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sinh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x) { + return _simd_sinh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x) { + return simd_make_double8(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#pragma mark - tanh implementation +static inline SIMD_CFUNC simd_float2 __tg_tanh(simd_float2 x) { + return simd_make_float2(__tg_tanh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tanh(simd_float3 x) { + return simd_make_float3(__tg_tanh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tanh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x) { + return _simd_tanh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x) { + return simd_make_float4(tanh(x.x), tanh(x.y), tanh(x.z), tanh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tanh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x) { + return _simd_tanh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x) { + return simd_make_float8(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tanh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x) { + return _simd_tanh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x) { + return simd_make_float16(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tanh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x) { + return _simd_tanh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x) { + return simd_make_double2(tanh(x.x), tanh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tanh(simd_double3 x) { + return simd_make_double3(__tg_tanh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tanh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x) { + return _simd_tanh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x) { + return simd_make_double4(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tanh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x) { + return _simd_tanh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x) { + return simd_make_double8(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#pragma mark - exp implementation +static inline SIMD_CFUNC simd_float2 __tg_exp(simd_float2 x) { + return simd_make_float2(__tg_exp(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp(simd_float3 x) { + return simd_make_float3(__tg_exp(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x) { + return _simd_exp_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x) { + return simd_make_float4(exp(x.x), exp(x.y), exp(x.z), exp(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x) { + return _simd_exp_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x) { + return simd_make_float8(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x) { + return _simd_exp_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x) { + return simd_make_float16(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x) { + return _simd_exp_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x) { + return simd_make_double2(exp(x.x), exp(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp(simd_double3 x) { + return simd_make_double3(__tg_exp(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x) { + return _simd_exp_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x) { + return simd_make_double4(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x) { + return _simd_exp_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x) { + return simd_make_double8(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#pragma mark - exp2 implementation +static inline SIMD_CFUNC simd_float2 __tg_exp2(simd_float2 x) { + return simd_make_float2(__tg_exp2(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp2(simd_float3 x) { + return simd_make_float3(__tg_exp2(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp2_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x) { + return _simd_exp2_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x) { + return simd_make_float4(exp2(x.x), exp2(x.y), exp2(x.z), exp2(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp2_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x) { + return _simd_exp2_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x) { + return simd_make_float8(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp2_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x) { + return _simd_exp2_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x) { + return simd_make_float16(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp2_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x) { + return _simd_exp2_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x) { + return simd_make_double2(exp2(x.x), exp2(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp2(simd_double3 x) { + return simd_make_double3(__tg_exp2(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp2_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x) { + return _simd_exp2_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x) { + return simd_make_double4(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp2_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x) { + return _simd_exp2_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x) { + return simd_make_double8(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#pragma mark - exp10 implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_exp10(simd_float2 x) { + return simd_make_float2(__tg_exp10(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp10(simd_float3 x) { + return simd_make_float3(__tg_exp10(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp10_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x) { + return _simd_exp10_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x) { + return simd_make_float4(__exp10(x.x), __exp10(x.y), __exp10(x.z), __exp10(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp10_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x) { + return _simd_exp10_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x) { + return simd_make_float8(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp10_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x) { + return _simd_exp10_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x) { + return simd_make_float16(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp10_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x) { + return _simd_exp10_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x) { + return simd_make_double2(__exp10(x.x), __exp10(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp10(simd_double3 x) { + return simd_make_double3(__tg_exp10(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp10_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x) { + return _simd_exp10_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x) { + return simd_make_double4(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp10_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x) { + return _simd_exp10_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x) { + return simd_make_double8(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - expm1 implementation +static inline SIMD_CFUNC simd_float2 __tg_expm1(simd_float2 x) { + return simd_make_float2(__tg_expm1(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_expm1(simd_float3 x) { + return simd_make_float3(__tg_expm1(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_expm1_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x) { + return _simd_expm1_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x) { + return simd_make_float4(expm1(x.x), expm1(x.y), expm1(x.z), expm1(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_expm1_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x) { + return _simd_expm1_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x) { + return simd_make_float8(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_expm1_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x) { + return _simd_expm1_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x) { + return simd_make_float16(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_expm1_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x) { + return _simd_expm1_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x) { + return simd_make_double2(expm1(x.x), expm1(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_expm1(simd_double3 x) { + return simd_make_double3(__tg_expm1(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_expm1_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x) { + return _simd_expm1_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x) { + return simd_make_double4(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_expm1_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x) { + return _simd_expm1_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x) { + return simd_make_double8(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#pragma mark - log implementation +static inline SIMD_CFUNC simd_float2 __tg_log(simd_float2 x) { + return simd_make_float2(__tg_log(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log(simd_float3 x) { + return simd_make_float3(__tg_log(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x) { + return _simd_log_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x) { + return simd_make_float4(log(x.x), log(x.y), log(x.z), log(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x) { + return _simd_log_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x) { + return simd_make_float8(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x) { + return _simd_log_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x) { + return simd_make_float16(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x) { + return _simd_log_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x) { + return simd_make_double2(log(x.x), log(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log(simd_double3 x) { + return simd_make_double3(__tg_log(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x) { + return _simd_log_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x) { + return simd_make_double4(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x) { + return _simd_log_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x) { + return simd_make_double8(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#pragma mark - log2 implementation +static inline SIMD_CFUNC simd_float2 __tg_log2(simd_float2 x) { + return simd_make_float2(__tg_log2(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log2(simd_float3 x) { + return simd_make_float3(__tg_log2(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log2_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x) { + return _simd_log2_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x) { + return simd_make_float4(log2(x.x), log2(x.y), log2(x.z), log2(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log2_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x) { + return _simd_log2_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x) { + return simd_make_float8(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log2_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x) { + return _simd_log2_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x) { + return simd_make_float16(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log2_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x) { + return _simd_log2_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x) { + return simd_make_double2(log2(x.x), log2(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log2(simd_double3 x) { + return simd_make_double3(__tg_log2(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log2_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x) { + return _simd_log2_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x) { + return simd_make_double4(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log2_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x) { + return _simd_log2_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x) { + return simd_make_double8(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#pragma mark - log10 implementation +static inline SIMD_CFUNC simd_float2 __tg_log10(simd_float2 x) { + return simd_make_float2(__tg_log10(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log10(simd_float3 x) { + return simd_make_float3(__tg_log10(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log10_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x) { + return _simd_log10_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x) { + return simd_make_float4(log10(x.x), log10(x.y), log10(x.z), log10(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log10_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x) { + return _simd_log10_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x) { + return simd_make_float8(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log10_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x) { + return _simd_log10_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x) { + return simd_make_float16(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log10_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x) { + return _simd_log10_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x) { + return simd_make_double2(log10(x.x), log10(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log10(simd_double3 x) { + return simd_make_double3(__tg_log10(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log10_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x) { + return _simd_log10_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x) { + return simd_make_double4(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log10_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x) { + return _simd_log10_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x) { + return simd_make_double8(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#pragma mark - log1p implementation +static inline SIMD_CFUNC simd_float2 __tg_log1p(simd_float2 x) { + return simd_make_float2(__tg_log1p(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log1p(simd_float3 x) { + return simd_make_float3(__tg_log1p(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log1p_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x) { + return _simd_log1p_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x) { + return simd_make_float4(log1p(x.x), log1p(x.y), log1p(x.z), log1p(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log1p_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x) { + return _simd_log1p_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x) { + return simd_make_float8(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log1p_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x) { + return _simd_log1p_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x) { + return simd_make_float16(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log1p_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x) { + return _simd_log1p_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x) { + return simd_make_double2(log1p(x.x), log1p(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log1p(simd_double3 x) { + return simd_make_double3(__tg_log1p(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log1p_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x) { + return _simd_log1p_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x) { + return simd_make_double4(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log1p_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x) { + return _simd_log1p_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x) { + return simd_make_double8(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#pragma mark - cbrt implementation +static inline SIMD_CFUNC simd_float2 __tg_cbrt(simd_float2 x) { + return simd_make_float2(__tg_cbrt(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cbrt(simd_float3 x) { + return simd_make_float3(__tg_cbrt(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cbrt_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x) { + return _simd_cbrt_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x) { + return simd_make_float4(cbrt(x.x), cbrt(x.y), cbrt(x.z), cbrt(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cbrt_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x) { + return _simd_cbrt_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x) { + return simd_make_float8(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cbrt_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x) { + return _simd_cbrt_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x) { + return simd_make_float16(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cbrt_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x) { + return _simd_cbrt_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x) { + return simd_make_double2(cbrt(x.x), cbrt(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cbrt(simd_double3 x) { + return simd_make_double3(__tg_cbrt(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cbrt_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x) { + return _simd_cbrt_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x) { + return simd_make_double4(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cbrt_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x) { + return _simd_cbrt_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x) { + return simd_make_double8(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#pragma mark - erf implementation +static inline SIMD_CFUNC simd_float2 __tg_erf(simd_float2 x) { + return simd_make_float2(__tg_erf(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_erf(simd_float3 x) { + return simd_make_float3(__tg_erf(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_erf_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x) { + return _simd_erf_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x) { + return simd_make_float4(erf(x.x), erf(x.y), erf(x.z), erf(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_erf_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x) { + return _simd_erf_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x) { + return simd_make_float8(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_erf_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x) { + return _simd_erf_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x) { + return simd_make_float16(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_erf_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x) { + return _simd_erf_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x) { + return simd_make_double2(erf(x.x), erf(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_erf(simd_double3 x) { + return simd_make_double3(__tg_erf(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_erf_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x) { + return _simd_erf_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x) { + return simd_make_double4(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_erf_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x) { + return _simd_erf_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x) { + return simd_make_double8(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#pragma mark - erfc implementation +static inline SIMD_CFUNC simd_float2 __tg_erfc(simd_float2 x) { + return simd_make_float2(__tg_erfc(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_erfc(simd_float3 x) { + return simd_make_float3(__tg_erfc(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_erfc_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x) { + return _simd_erfc_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x) { + return simd_make_float4(erfc(x.x), erfc(x.y), erfc(x.z), erfc(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_erfc_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x) { + return _simd_erfc_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x) { + return simd_make_float8(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_erfc_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x) { + return _simd_erfc_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x) { + return simd_make_float16(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_erfc_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x) { + return _simd_erfc_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x) { + return simd_make_double2(erfc(x.x), erfc(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_erfc(simd_double3 x) { + return simd_make_double3(__tg_erfc(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_erfc_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x) { + return _simd_erfc_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x) { + return simd_make_double4(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_erfc_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x) { + return _simd_erfc_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x) { + return simd_make_double8(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#pragma mark - tgamma implementation +static inline SIMD_CFUNC simd_float2 __tg_tgamma(simd_float2 x) { + return simd_make_float2(__tg_tgamma(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tgamma(simd_float3 x) { + return simd_make_float3(__tg_tgamma(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tgamma_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x) { + return _simd_tgamma_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x) { + return simd_make_float4(tgamma(x.x), tgamma(x.y), tgamma(x.z), tgamma(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tgamma_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x) { + return _simd_tgamma_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x) { + return simd_make_float8(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tgamma_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x) { + return _simd_tgamma_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x) { + return simd_make_float16(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tgamma_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x) { + return _simd_tgamma_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x) { + return simd_make_double2(tgamma(x.x), tgamma(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tgamma(simd_double3 x) { + return simd_make_double3(__tg_tgamma(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tgamma_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x) { + return _simd_tgamma_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x) { + return simd_make_double4(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tgamma_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x) { + return _simd_tgamma_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x) { + return simd_make_double8(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#pragma mark - round implementation +static inline SIMD_CFUNC simd_float2 __tg_round(simd_float2 x) { + return simd_make_float2(__tg_round(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_round(simd_float3 x) { + return simd_make_float3(__tg_round(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_round_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x) { +#if defined __arm64__ + return vrndaq_f32(x); +#else + return _simd_round_f4(x); +#endif +} +#else +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x) { + return simd_make_float4(round(x.x), round(x.y), round(x.z), round(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_round_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x) { + return _simd_round_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x) { + return simd_make_float8(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_round_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x) { + return _simd_round_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x) { + return simd_make_float16(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_round_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x) { +#if defined __arm64__ + return vrndaq_f64(x); +#else + return _simd_round_d2(x); +#endif +} +#else +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x) { + return simd_make_double2(round(x.x), round(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_round(simd_double3 x) { + return simd_make_double3(__tg_round(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_round_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x) { + return _simd_round_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x) { + return simd_make_double4(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_round_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x) { + return _simd_round_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x) { + return simd_make_double8(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#pragma mark - atan2 implementation +static inline SIMD_CFUNC simd_float2 __tg_atan2(simd_float2 y, simd_float2 x) { + return simd_make_float2(__tg_atan2(simd_make_float4(y), simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atan2(simd_float3 y, simd_float3 x) { + return simd_make_float3(__tg_atan2(simd_make_float4(y), simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atan2_f4(simd_float4 y, simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x) { + return _simd_atan2_f4(y, x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x) { + return simd_make_float4(atan2(y.x, x.x), atan2(y.y, x.y), atan2(y.z, x.z), atan2(y.w, x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atan2_f8(simd_float8 y, simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x) { + return _simd_atan2_f8(y, x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x) { + return simd_make_float8(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atan2_f16(simd_float16 y, simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x) { + return _simd_atan2_f16(y, x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x) { + return simd_make_float16(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atan2_d2(simd_double2 y, simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x) { + return _simd_atan2_d2(y, x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x) { + return simd_make_double2(atan2(y.x, x.x), atan2(y.y, x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atan2(simd_double3 y, simd_double3 x) { + return simd_make_double3(__tg_atan2(simd_make_double4(y), simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atan2_d4(simd_double4 y, simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x) { + return _simd_atan2_d4(y, x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x) { + return simd_make_double4(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atan2_d8(simd_double8 y, simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x) { + return _simd_atan2_d8(y, x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x) { + return simd_make_double8(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#pragma mark - hypot implementation +static inline SIMD_CFUNC simd_float2 __tg_hypot(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_hypot(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_hypot(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_hypot(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_hypot_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y) { + return _simd_hypot_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y) { + return simd_make_float4(hypot(x.x, y.x), hypot(x.y, y.y), hypot(x.z, y.z), hypot(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_hypot_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y) { + return _simd_hypot_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_hypot_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y) { + return _simd_hypot_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_hypot_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y) { + return _simd_hypot_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y) { + return simd_make_double2(hypot(x.x, y.x), hypot(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_hypot(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_hypot(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_hypot_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y) { + return _simd_hypot_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_hypot_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y) { + return _simd_hypot_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#pragma mark - pow implementation +static inline SIMD_CFUNC simd_float2 __tg_pow(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_pow(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_pow(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_pow(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_pow_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y) { + return _simd_pow_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y) { + return simd_make_float4(pow(x.x, y.x), pow(x.y, y.y), pow(x.z, y.z), pow(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_pow_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y) { + return _simd_pow_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_pow_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y) { + return _simd_pow_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_pow_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y) { + return _simd_pow_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y) { + return simd_make_double2(pow(x.x, y.x), pow(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_pow(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_pow(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_pow_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y) { + return _simd_pow_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_pow_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y) { + return _simd_pow_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#pragma mark - fmod implementation +static inline SIMD_CFUNC simd_float2 __tg_fmod(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_fmod(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_fmod(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmod(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_fmod_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y) { + return _simd_fmod_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y) { + return simd_make_float4(fmod(x.x, y.x), fmod(x.y, y.y), fmod(x.z, y.z), fmod(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_fmod_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y) { + return _simd_fmod_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_fmod_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y) { + return _simd_fmod_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_fmod_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y) { + return _simd_fmod_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y) { + return simd_make_double2(fmod(x.x, y.x), fmod(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_fmod(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmod(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_fmod_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y) { + return _simd_fmod_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_fmod_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y) { + return _simd_fmod_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#pragma mark - remainder implementation +static inline SIMD_CFUNC simd_float2 __tg_remainder(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_remainder(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_remainder(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_remainder(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_remainder_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y) { + return _simd_remainder_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y) { + return simd_make_float4(remainder(x.x, y.x), remainder(x.y, y.y), remainder(x.z, y.z), remainder(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_remainder_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y) { + return _simd_remainder_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_remainder_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y) { + return _simd_remainder_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_remainder_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y) { + return _simd_remainder_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y) { + return simd_make_double2(remainder(x.x, y.x), remainder(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_remainder(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_remainder(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_remainder_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y) { + return _simd_remainder_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_remainder_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y) { + return _simd_remainder_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#pragma mark - nextafter implementation +static inline SIMD_CFUNC simd_float2 __tg_nextafter(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_nextafter(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_nextafter(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_nextafter(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_nextafter_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y) { + return _simd_nextafter_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y) { + return simd_make_float4(nextafter(x.x, y.x), nextafter(x.y, y.y), nextafter(x.z, y.z), nextafter(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_nextafter_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y) { + return _simd_nextafter_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_nextafter_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y) { + return _simd_nextafter_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_nextafter_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y) { + return _simd_nextafter_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y) { + return simd_make_double2(nextafter(x.x, y.x), nextafter(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_nextafter(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_nextafter(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_nextafter_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y) { + return _simd_nextafter_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_nextafter_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y) { + return _simd_nextafter_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +static inline SIMD_CFUNC simd_float2 __tg_fdim(simd_float2 x, simd_float2 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_float3 __tg_fdim(simd_float3 x, simd_float3 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_float4 __tg_fdim(simd_float4 x, simd_float4 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_float8 __tg_fdim(simd_float8 x, simd_float8 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_float16 __tg_fdim(simd_float16 x, simd_float16 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_double2 __tg_fdim(simd_double2 x, simd_double2 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_double3 __tg_fdim(simd_double3 x, simd_double3 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_double4 __tg_fdim(simd_double4 x, simd_double4 y) { return simd_bitselect(x-y, 0, x<y); } +static inline SIMD_CFUNC simd_double8 __tg_fdim(simd_double8 x, simd_double8 y) { return simd_bitselect(x-y, 0, x<y); } + +static inline SIMD_CFUNC simd_float2 __tg_fma(simd_float2 x, simd_float2 y, simd_float2 z) { +#if defined __arm64__ || defined __ARM_VFPV4__ + return vfma_f32(z, x, y); +#else + return simd_make_float2(__tg_fma(simd_make_float4_undef(x), simd_make_float4_undef(y), simd_make_float4_undef(z))); +#endif +} + +static inline SIMD_CFUNC simd_float3 __tg_fma(simd_float3 x, simd_float3 y, simd_float3 z) { + return simd_make_float3(__tg_fma(simd_make_float4(x), simd_make_float4(y), simd_make_float4(z))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_fma_f4(simd_float4 x, simd_float4 y, simd_float4 z); +#endif +static inline SIMD_CFUNC simd_float4 __tg_fma(simd_float4 x, simd_float4 y, simd_float4 z) { +#if defined __arm64__ || defined __ARM_VFPV4__ + return vfmaq_f32(z, x, y); +#elif (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm_fmadd_ps(x, y, z); +#elif SIMD_LIBRARY_VERSION >= 3 + return _simd_fma_f4(x, y, z); +#else + return simd_make_float4(fma(x.x, y.x, z.x), fma(x.y, y.y, z.y), fma(x.z, y.z, z.z), fma(x.w, y.w, z.w)); +#endif +} + +static inline SIMD_CFUNC simd_float8 __tg_fma(simd_float8 x, simd_float8 y, simd_float8 z) { +#if (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm256_fmadd_ps(x, y, z); +#else + return simd_make_float8(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 __tg_fma(simd_float16 x, simd_float16 y, simd_float16 z) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_fmadd_ps(x, y, z); +#else + return simd_make_float16(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_fma_d2(simd_double2 x, simd_double2 y, simd_double2 z); +#endif +static inline SIMD_CFUNC simd_double2 __tg_fma(simd_double2 x, simd_double2 y, simd_double2 z) { +#if defined __arm64__ + return vfmaq_f64(z, x, y); +#elif (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm_fmadd_pd(x, y, z); +#elif SIMD_LIBRARY_VERSION >= 3 + return _simd_fma_d2(x, y, z); +#else + return simd_make_double2(fma(x.x, y.x, z.x), fma(x.y, y.y, z.y)); +#endif +} + +static inline SIMD_CFUNC simd_double3 __tg_fma(simd_double3 x, simd_double3 y, simd_double3 z) { + return simd_make_double3(__tg_fma(simd_make_double4(x), simd_make_double4(y), simd_make_double4(z))); +} + +static inline SIMD_CFUNC simd_double4 __tg_fma(simd_double4 x, simd_double4 y, simd_double4 z) { +#if (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm256_fmadd_pd(x, y, z); +#else + return simd_make_double4(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC simd_double8 __tg_fma(simd_double8 x, simd_double8 y, simd_double8 z) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_fmadd_pd(x, y, z); +#else + return simd_make_double8(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC float simd_muladd(float x, float y, float z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float2 simd_muladd(simd_float2 x, simd_float2 y, simd_float2 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float3 simd_muladd(simd_float3 x, simd_float3 y, simd_float3 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float4 simd_muladd(simd_float4 x, simd_float4 y, simd_float4 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float8 simd_muladd(simd_float8 x, simd_float8 y, simd_float8 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float16 simd_muladd(simd_float16 x, simd_float16 y, simd_float16 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC double simd_muladd(double x, double y, double z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double2 simd_muladd(simd_double2 x, simd_double2 y, simd_double2 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double3 simd_muladd(simd_double3 x, simd_double3 y, simd_double3 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double4 simd_muladd(simd_double4 x, simd_double4 y, simd_double4 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double8 simd_muladd(simd_double8 x, simd_double8 y, simd_double8 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +#ifdef __cplusplus +} /* extern "C" */ +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_MATH_HEADER */
\ No newline at end of file diff --git a/lib/libc/include/aarch64-macos-gnu/simd/packed.h b/lib/libc/include/aarch64-macos-gnu/simd/packed.h new file mode 100644 index 0000000000..ddbd861090 --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/packed.h @@ -0,0 +1,1031 @@ +/*! @header + * This header defines fixed size vector types with relaxed alignment. For + * each vector type defined by <simd/vector_types.h> that is not a 1- or 3- + * element vector, there is a corresponding type defined by this header that + * requires only the alignment matching that of the underlying scalar type. + * + * These types should be used to access buffers that may not be sufficiently + * aligned to allow them to be accessed using the "normal" simd vector types. + * As an example of this usage, suppose that you want to load a vector of + * four floats from an array of floats. The type simd_float4 has sixteen byte + * alignment, whereas an array of floats has only four byte alignment. + * Thus, naively casting a pointer into the array to (simd_float4 *) would + * invoke undefined behavior, and likely produce an alignment fault at + * runtime. Instead, use the corresponding packed type to load from the array: + * + * <pre> + * @textblock + * simd_float4 vector = *(packed_simd_float4 *)&array[i]; + * // do something with vector ... + * @/textblock + * </pre> + * + * It's important to note that the packed_ types are only needed to work with + * memory; once the data is loaded, we simply operate on it as usual using + * the simd_float4 type, as illustrated above. + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_PACKED_TYPES +#define SIMD_PACKED_TYPES + +# include <simd/vector_types.h> +# if SIMD_COMPILER_HAS_REQUIRED_FEATURES +/*! @abstract A vector of two 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::char2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(1))) char simd_packed_char2; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::char4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(1))) char simd_packed_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ this type is also available as simd::packed::char8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(1))) char simd_packed_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) char simd_packed_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char32. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) char simd_packed_char32; + +/*! @abstract A vector of sixty-four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char64. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(1))) char simd_packed_char64; + +/*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uchar2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(1))) unsigned char simd_packed_uchar2; + +/*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uchar4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(1))) unsigned char simd_packed_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uchar8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(1))) unsigned char simd_packed_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) unsigned char simd_packed_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) unsigned char simd_packed_uchar32; + +/*! @abstract A vector of sixty-four 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar64. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(1))) unsigned char simd_packed_uchar64; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::short2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(2))) short simd_packed_short2; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::short4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(2))) short simd_packed_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::short8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(2))) short simd_packed_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as + * simd::packed::short16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) short simd_packed_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description In C++ this type is also available as + * simd::packed::short32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(2))) short simd_packed_short32; + +/*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ushort2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(2))) unsigned short simd_packed_ushort2; + +/*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ushort4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(2))) unsigned short simd_packed_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort8. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(2))) unsigned short simd_packed_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) unsigned short simd_packed_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(2))) unsigned short simd_packed_ushort32; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::int2. The alignment of this type is that of the underlying + * scalar element type, so you can use it to load or store from an array of + * that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) int simd_packed_int2; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::int4. The alignment of this type is that of the underlying + * scalar element type, so you can use it to load or store from an array of + * that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) int simd_packed_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::int8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) int simd_packed_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::int16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) int simd_packed_int16; + +/*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uint2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) unsigned int simd_packed_uint2; + +/*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uint4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) unsigned int simd_packed_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uint8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) unsigned int simd_packed_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uint16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) unsigned int simd_packed_uint16; + +/*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::float2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) float simd_packed_float2; + +/*! @abstract A vector of four 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::float4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) float simd_packed_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::float8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) float simd_packed_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::float16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) float simd_packed_float16; + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::long2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) simd_long1 simd_packed_long2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) simd_long1 simd_packed_long2; +#endif + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::long4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) simd_long1 simd_packed_long4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) simd_long1 simd_packed_long4; +#endif + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::long8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) simd_long1 simd_packed_long8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) simd_long1 simd_packed_long8; +#endif + +/*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ulong2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) simd_ulong1 simd_packed_ulong2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) simd_ulong1 simd_packed_ulong2; +#endif + +/*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ulong4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) simd_ulong1 simd_packed_ulong4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) simd_ulong1 simd_packed_ulong4; +#endif + +/*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::ulong8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) simd_ulong1 simd_packed_ulong8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) simd_ulong1 simd_packed_ulong8; +#endif + +/*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::double2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) double simd_packed_double2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) double simd_packed_double2; +#endif + +/*! @abstract A vector of four 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::double4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) double simd_packed_double4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) double simd_packed_double4; +#endif + +/*! @abstract A vector of eight 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::double8. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) double simd_packed_double8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) double simd_packed_double8; +#endif + +/* MARK: C++ vector types */ +#if defined __cplusplus +namespace simd { + namespace packed { + /*! @abstract A vector of two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_char2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_char2 char2; + + /*! @abstract A vector of four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_char4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_char4 char4; + + /*! @abstract A vector of eight 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char8 char8; + + /*! @abstract A vector of sixteen 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char16 char16; + + /*! @abstract A vector of thirty-two 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char32 char32; + + /*! @abstract A vector of sixty-four 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char64. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char64 char64; + + /*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uchar2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uchar2 uchar2; + + /*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uchar4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uchar4 uchar4; + + /*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar8 uchar8; + + /*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar16 uchar16; + + /*! @abstract A vector of thirty-two 8-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar32 uchar32; + + /*! @abstract A vector of sixty-four 8-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar64. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar64 uchar64; + + /*! @abstract A vector of two 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_short2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_short2 short2; + + /*! @abstract A vector of four 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_short4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_short4 short4; + + /*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short8 short8; + + /*! @abstract A vector of sixteen 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short16 short16; + + /*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short32 short32; + + /*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ushort2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ushort2 ushort2; + + /*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ushort4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ushort4 ushort4; + + /*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort8 ushort8; + + /*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort16 ushort16; + + /*! @abstract A vector of thirty-two 16-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort32 ushort32; + + /*! @abstract A vector of two 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_int2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_int2 int2; + + /*! @abstract A vector of four 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_int4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_int4 int4; + + /*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_int8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_int8 int8; + + /*! @abstract A vector of sixteen 32-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_int16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_int16 int16; + + /*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uint2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uint2 uint2; + + /*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uint4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uint4 uint4; + + /*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uint8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uint8 uint8; + + /*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uint16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uint16 uint16; + + /*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_float2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_float2 float2; + + /*! @abstract A vector of four 32-bit floating-point numbers with + * relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_float4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_float4 float4; + + /*! @abstract A vector of eight 32-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_float8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_float8 float8; + + /*! @abstract A vector of sixteen 32-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_float16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_float16 float16; + + /*! @abstract A vector of two 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_long2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_long2 long2; + + /*! @abstract A vector of four 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_long4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_long4 long4; + + /*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_long8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_long8 long8; + + /*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ulong2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ulong2 ulong2; + + /*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ulong4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ulong4 ulong4; + + /*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ulong8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ulong8 ulong8; + + /*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_double2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_double2 double2; + + /*! @abstract A vector of four 64-bit floating-point numbers with + * relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_double4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_double4 double4; + + /*! @abstract A vector of eight 64-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_double8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_double8 double8; + + } /* namespace simd::packed:: */ +} /* namespace simd:: */ +#endif /* __cplusplus */ + +/* MARK: Deprecated vector types */ +/*! @group Deprecated vector types + * @discussion These are the original types used by earlier versions of the + * simd library; they are provided here for compatability with existing source + * files. Use the new ("simd_"-prefixed) types for future development. */ +/*! @abstract A vector of two 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char2 + * or simd::packed::char2 instead. */ +typedef simd_packed_char2 packed_char2; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char4 + * or simd::packed::char4 instead. */ +typedef simd_packed_char4 packed_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char8 + * or simd::packed::char8 instead. */ +typedef simd_packed_char8 packed_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char16 + * or simd::packed::char16 instead. */ +typedef simd_packed_char16 packed_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char32 + * or simd::packed::char32 instead. */ +typedef simd_packed_char32 packed_char32; + +/*! @abstract A vector of sixty-four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char64 + * or simd::packed::char64 instead. */ +typedef simd_packed_char64 packed_char64; + +/*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar2 + * or simd::packed::uchar2 instead. */ +typedef simd_packed_uchar2 packed_uchar2; + +/*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar4 + * or simd::packed::uchar4 instead. */ +typedef simd_packed_uchar4 packed_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar8 + * or simd::packed::uchar8 instead. */ +typedef simd_packed_uchar8 packed_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar16 + * or simd::packed::uchar16 instead. */ +typedef simd_packed_uchar16 packed_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar32 + * or simd::packed::uchar32 instead. */ +typedef simd_packed_uchar32 packed_uchar32; + +/*! @abstract A vector of sixty-four 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar64 + * or simd::packed::uchar64 instead. */ +typedef simd_packed_uchar64 packed_uchar64; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short2 + * or simd::packed::short2 instead. */ +typedef simd_packed_short2 packed_short2; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short4 + * or simd::packed::short4 instead. */ +typedef simd_packed_short4 packed_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short8 + * or simd::packed::short8 instead. */ +typedef simd_packed_short8 packed_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short16 + * or simd::packed::short16 instead. */ +typedef simd_packed_short16 packed_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short32 + * or simd::packed::short32 instead. */ +typedef simd_packed_short32 packed_short32; + +/*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort2 + * or simd::packed::ushort2 instead. */ +typedef simd_packed_ushort2 packed_ushort2; + +/*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort4 + * or simd::packed::ushort4 instead. */ +typedef simd_packed_ushort4 packed_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort8 + * or simd::packed::ushort8 instead. */ +typedef simd_packed_ushort8 packed_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use + * simd_packed_ushort16 or simd::packed::ushort16 instead. */ +typedef simd_packed_ushort16 packed_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use + * simd_packed_ushort32 or simd::packed::ushort32 instead. */ +typedef simd_packed_ushort32 packed_ushort32; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int2 or + * simd::packed::int2 instead. */ +typedef simd_packed_int2 packed_int2; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int4 or + * simd::packed::int4 instead. */ +typedef simd_packed_int4 packed_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int8 or + * simd::packed::int8 instead. */ +typedef simd_packed_int8 packed_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int16 + * or simd::packed::int16 instead. */ +typedef simd_packed_int16 packed_int16; + +/*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint2 + * or simd::packed::uint2 instead. */ +typedef simd_packed_uint2 packed_uint2; + +/*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint4 + * or simd::packed::uint4 instead. */ +typedef simd_packed_uint4 packed_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint8 + * or simd::packed::uint8 instead. */ +typedef simd_packed_uint8 packed_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint16 + * or simd::packed::uint16 instead. */ +typedef simd_packed_uint16 packed_uint16; + +/*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float2 + * or simd::packed::float2 instead. */ +typedef simd_packed_float2 packed_float2; + +/*! @abstract A vector of four 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float4 + * or simd::packed::float4 instead. */ +typedef simd_packed_float4 packed_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float8 + * or simd::packed::float8 instead. */ +typedef simd_packed_float8 packed_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float16 + * or simd::packed::float16 instead. */ +typedef simd_packed_float16 packed_float16; + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long2 + * or simd::packed::long2 instead. */ +typedef simd_packed_long2 packed_long2; + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long4 + * or simd::packed::long4 instead. */ +typedef simd_packed_long4 packed_long4; + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long8 + * or simd::packed::long8 instead. */ +typedef simd_packed_long8 packed_long8; + +/*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong2 + * or simd::packed::ulong2 instead. */ +typedef simd_packed_ulong2 packed_ulong2; + +/*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong4 + * or simd::packed::ulong4 instead. */ +typedef simd_packed_ulong4 packed_ulong4; + +/*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong8 + * or simd::packed::ulong8 instead. */ +typedef simd_packed_ulong8 packed_ulong8; + +/*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double2 + * or simd::packed::double2 instead. */ +typedef simd_packed_double2 packed_double2; + +/*! @abstract A vector of four 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double4 + * or simd::packed::double4 instead. */ +typedef simd_packed_double4 packed_double4; + +/*! @abstract A vector of eight 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double8 + * or simd::packed::double8 instead. */ +typedef simd_packed_double8 packed_double8; + +# endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif
\ No newline at end of file diff --git a/lib/libc/include/aarch64-macos-gnu/simd/quaternion.h b/lib/libc/include/aarch64-macos-gnu/simd/quaternion.h new file mode 100644 index 0000000000..b7c5e2909d --- /dev/null +++ b/lib/libc/include/aarch64-macos-gnu/simd/quaternion.h @@ -0,0 +1,1194 @@ +/*! @header + * This header defines functions for constructing and using quaternions. + * @copyright 2015-2016 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_QUATERNIONS +#define SIMD_QUATERNIONS + +#include <simd/base.h> +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include <simd/vector.h> +#include <simd/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* MARK: - C and Objective-C float interfaces */ + +/*! @abstract Constructs a quaternion from four scalar values. + * + * @param ix The first component of the imaginary (vector) part. + * @param iy The second component of the imaginary (vector) part. + * @param iz The third component of the imaginary (vector) part. + * + * @param r The real (scalar) part. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(float ix, float iy, float iz, float r) { + return (simd_quatf){ { ix, iy, iz, r } }; +} + +/*! @abstract Constructs a quaternion from an array of four scalars. + * + * @discussion Note that the imaginary part of the quaternion comes from + * array elements 0, 1, and 2, and the real part comes from element 3. */ +static inline SIMD_NONCONST simd_quatf simd_quaternion(const float xyzr[4]) { + return (simd_quatf){ *(const simd_packed_float4 *)xyzr }; +} + +/*! @abstract Constructs a quaternion from a four-element vector. + * + * @discussion Note that the imaginary (vector) part of the quaternion comes + * from lanes 0, 1, and 2 of the vector, and the real (scalar) part comes from + * lane 3. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(simd_float4 xyzr) { + return (simd_quatf){ xyzr }; +} + +/*! @abstract Constructs a quaternion that rotates by `angle` radians about + * `axis`. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(float angle, simd_float3 axis); + +/*! @abstract Construct a quaternion that rotates from one vector to another. + * + * @param from A normalized three-element vector. + * @param to A normalized three-element vector. + * + * @discussion The rotation axis is `simd_cross(from, to)`. If `from` and + * `to` point in opposite directions (to within machine precision), an + * arbitrary rotation axis is chosen, and the angle is pi radians. */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3 from, simd_float3 to); + +/*! @abstract Construct a quaternion from a 3x3 rotation `matrix`. + * + * @discussion If `matrix` is not orthogonal with determinant 1, the result + * is undefined. */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3x3 matrix); + +/*! @abstract Construct a quaternion from a 4x4 rotation `matrix`. + * + * @discussion The last row and column of the matrix are ignored. This + * function is equivalent to calling simd_quaternion with the upper-left 3x3 + * submatrix . */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float4x4 matrix); + +/*! @abstract The real (scalar) part of the quaternion `q`. */ +static inline SIMD_CFUNC float simd_real(simd_quatf q) { + return q.vector.w; +} + +/*! @abstract The imaginary (vector) part of the quaternion `q`. */ +static inline SIMD_CFUNC simd_float3 simd_imag(simd_quatf q) { + return q.vector.xyz; +} + +/*! @abstract The angle (in radians) of rotation represented by `q`. */ +static inline SIMD_CFUNC float simd_angle(simd_quatf q); + +/*! @abstract The normalized axis (a 3-element vector) around which the + * action of the quaternion `q` rotates. */ +static inline SIMD_CFUNC simd_float3 simd_axis(simd_quatf q); + +/*! @abstract The sum of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_add(simd_quatf p, simd_quatf q); + +/*! @abstract The difference of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_sub(simd_quatf p, simd_quatf q); + +/*! @abstract The product of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf p, simd_quatf q); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf q, float a); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(float a, simd_quatf q); + +/*! @abstract The conjugate of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_conjugate(simd_quatf q); + +/*! @abstract The (multiplicative) inverse of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_inverse(simd_quatf q); + +/*! @abstract The negation (additive inverse) of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_negate(simd_quatf q); + +/*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ +static inline SIMD_CFUNC float simd_dot(simd_quatf p, simd_quatf q); + +/*! @abstract The length of the quaternion `q`. */ +static inline SIMD_CFUNC float simd_length(simd_quatf q); + +/*! @abstract The unit quaternion obtained by normalizing `q`. */ +static inline SIMD_CFUNC simd_quatf simd_normalize(simd_quatf q); + +/*! @abstract Rotates the vector `v` by the quaternion `q`. */ +static inline SIMD_CFUNC simd_float3 simd_act(simd_quatf q, simd_float3 v); + +/*! @abstract Logarithm of the quaternion `q`. + * @discussion Do not call this function directly; use `log(q)` instead. + * + * We can write a quaternion `q` in the form: `r(cos(t) + sin(t)v)` where + * `r` is the length of `q`, `t` is an angle, and `v` is a unit 3-vector. + * The logarithm of `q` is `log(r) + tv`, just like the logarithm of the + * complex number `r*(cos(t) + i sin(t))` is `log(r) + it`. + * + * Note that this function is not robust against poorly-scaled non-unit + * quaternions, because it is primarily used for spline interpolation of + * unit quaternions. If you need to compute a robust logarithm of general + * quaternions, you can use the following approach: + * + * scale = simd_reduce_max(simd_abs(q.vector)); + * logq = log(simd_recip(scale)*q); + * logq.real += log(scale); + * return logq; */ +static SIMD_NOINLINE simd_quatf __tg_log(simd_quatf q); + +/*! @abstract Inverse of `log( )`; the exponential map on quaternions. + * @discussion Do not call this function directly; use `exp(q)` instead. */ +static SIMD_NOINLINE simd_quatf __tg_exp(simd_quatf q); + +/*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatf simd_slerp(simd_quatf q0, simd_quatf q1, float t); + +/*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatf simd_slerp_longest(simd_quatf q0, simd_quatf q1, float t); + +/*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ +static SIMD_NOINLINE simd_quatf simd_spline(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t); + +/*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ +static SIMD_NOINLINE simd_quatf simd_bezier(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t); + +#ifdef __cplusplus +} /* extern "C" */ +/* MARK: - C++ float interfaces */ + +namespace simd { + struct quatf : ::simd_quatf { + /*! @abstract The identity quaternion. */ + quatf( ) : ::simd_quatf(::simd_quaternion((float4){0,0,0,1})) { } + + /*! @abstract Constructs a C++ quaternion from a C quaternion. */ + quatf(::simd_quatf q) : ::simd_quatf(q) { } + + /*! @abstract Constructs a quaternion from components. */ + quatf(float ix, float iy, float iz, float r) : ::simd_quatf(::simd_quaternion(ix, iy, iz, r)) { } + + /*! @abstract Constructs a quaternion from an array of scalars. */ + quatf(const float xyzr[4]) : ::simd_quatf(::simd_quaternion(xyzr)) { } + + /*! @abstract Constructs a quaternion from a vector. */ + quatf(float4 xyzr) : ::simd_quatf(::simd_quaternion(xyzr)) { } + + /*! @abstract Quaternion representing rotation about `axis` by `angle` + * radians. */ + quatf(float angle, float3 axis) : ::simd_quatf(::simd_quaternion(angle, axis)) { } + + /*! @abstract Quaternion that rotates `from` into `to`. */ + quatf(float3 from, float3 to) : ::simd_quatf(::simd_quaternion(from, to)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatf(::simd_float3x3 matrix) : ::simd_quatf(::simd_quaternion(matrix)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatf(::simd_float4x4 matrix) : ::simd_quatf(::simd_quaternion(matrix)) { } + + /*! @abstract The real (scalar) part of the quaternion. */ + float real(void) const { return ::simd_real(*this); } + + /*! @abstract The imaginary (vector) part of the quaternion. */ + float3 imag(void) const { return ::simd_imag(*this); } + + /*! @abstract The angle the quaternion rotates by. */ + float angle(void) const { return ::simd_angle(*this); } + + /*! @abstract The axis the quaternion rotates about. */ + float3 axis(void) const { return ::simd_axis(*this); } + + /*! @abstract The length of the quaternion. */ + float length(void) const { return ::simd_length(*this); } + + /*! @abstract Act on the vector `v` by rotation. */ + float3 operator()(const ::simd_float3 v) const { return ::simd_act(*this, v); } + }; + + static SIMD_CPPFUNC quatf operator+(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_add(p, q); } + static SIMD_CPPFUNC quatf operator-(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_sub(p, q); } + static SIMD_CPPFUNC quatf operator-(const ::simd_quatf p) { return ::simd_negate(p); } + static SIMD_CPPFUNC quatf operator*(const float r, const ::simd_quatf p) { return ::simd_mul(r, p); } + static SIMD_CPPFUNC quatf operator*(const ::simd_quatf p, const float r) { return ::simd_mul(p, r); } + static SIMD_CPPFUNC quatf operator*(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_mul(p, q); } + static SIMD_CPPFUNC quatf operator/(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_mul(p, ::simd_inverse(q)); } + static SIMD_CPPFUNC quatf operator+=(quatf &p, const ::simd_quatf q) { return p = p+q; } + static SIMD_CPPFUNC quatf operator-=(quatf &p, const ::simd_quatf q) { return p = p-q; } + static SIMD_CPPFUNC quatf operator*=(quatf &p, const float r) { return p = p*r; } + static SIMD_CPPFUNC quatf operator*=(quatf &p, const ::simd_quatf q) { return p = p*q; } + static SIMD_CPPFUNC quatf operator/=(quatf &p, const ::simd_quatf q) { return p = p/q; } + + /*! @abstract The conjugate of the quaternion `q`. */ + static SIMD_CPPFUNC quatf conjugate(const ::simd_quatf p) { return ::simd_conjugate(p); } + + /*! @abstract The (multiplicative) inverse of the quaternion `q`. */ + static SIMD_CPPFUNC quatf inverse(const ::simd_quatf p) { return ::simd_inverse(p); } + + /*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ + static SIMD_CPPFUNC float dot(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_dot(p, q); } + + /*! @abstract The unit quaternion obtained by normalizing `q`. */ + static SIMD_CPPFUNC quatf normalize(const ::simd_quatf p) { return ::simd_normalize(p); } + + /*! @abstract logarithm of the quaternion `q`. */ + static SIMD_CPPFUNC quatf log(const ::simd_quatf q) { return ::__tg_log(q); } + + /*! @abstract exponential map of quaterion `q`. */ + static SIMD_CPPFUNC quatf exp(const ::simd_quatf q) { return ::__tg_exp(q); } + + /*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatf slerp(const ::simd_quatf p0, const ::simd_quatf p1, float t) { return ::simd_slerp(p0, p1, t); } + + /*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatf slerp_longest(const ::simd_quatf p0, const ::simd_quatf p1, float t) { return ::simd_slerp_longest(p0, p1, t); } + + /*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ + static SIMD_CPPFUNC quatf spline(const ::simd_quatf p0, const ::simd_quatf p1, const ::simd_quatf p2, const ::simd_quatf p3, float t) { return ::simd_spline(p0, p1, p2, p3, t); } + + /*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ + static SIMD_CPPFUNC quatf bezier(const ::simd_quatf p0, const ::simd_quatf p1, const ::simd_quatf p2, const ::simd_quatf p3, float t) { return ::simd_bezier(p0, p1, p2, p3, t); } +} + +extern "C" { +#endif /* __cplusplus */ + +/* MARK: - float implementations */ + +#include <simd/math.h> +#include <simd/geometry.h> + +/* tg_promote is implementation gobbledygook that enables the compile-time + * dispatching in tgmath.h to work its magic. */ +static simd_quatf __attribute__((__overloadable__)) __tg_promote(simd_quatf); + +/*! @abstract Constructs a quaternion from imaginary and real parts. + * @discussion This function is hidden behind an underscore to avoid confusion + * with the angle-axis constructor. */ +static inline SIMD_CFUNC simd_quatf _simd_quaternion(simd_float3 imag, float real) { + return simd_quaternion(simd_make_float4(imag, real)); +} + +static inline SIMD_CFUNC simd_quatf simd_quaternion(float angle, simd_float3 axis) { + return _simd_quaternion(sin(angle/2) * axis, cos(angle/2)); +} + +static inline SIMD_CFUNC float simd_angle(simd_quatf q) { + return 2*atan2(simd_length(q.vector.xyz), q.vector.w); +} + +static inline SIMD_CFUNC simd_float3 simd_axis(simd_quatf q) { + return simd_normalize(q.vector.xyz); +} + +static inline SIMD_CFUNC simd_quatf simd_add(simd_quatf p, simd_quatf q) { + return simd_quaternion(p.vector + q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_sub(simd_quatf p, simd_quatf q) { + return simd_quaternion(p.vector - q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf p, simd_quatf q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion((p.vector.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + p.vector.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5)) + + (p.vector.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6) + + p.vector.w * q.vector)); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf q, float a) { + return simd_quaternion(a * q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(float a, simd_quatf q) { + return simd_mul(q,a); +} + +static inline SIMD_CFUNC simd_quatf simd_conjugate(simd_quatf q) { + return simd_quaternion(q.vector * (simd_float4){-1,-1,-1, 1}); +} + +static inline SIMD_CFUNC simd_quatf simd_inverse(simd_quatf q) { + return simd_quaternion(simd_conjugate(q).vector * simd_recip(simd_length_squared(q.vector))); +} + +static inline SIMD_CFUNC simd_quatf simd_negate(simd_quatf q) { + return simd_quaternion(-q.vector); +} + +static inline SIMD_CFUNC float simd_dot(simd_quatf p, simd_quatf q) { + return simd_dot(p.vector, q.vector); +} + +static inline SIMD_CFUNC float simd_length(simd_quatf q) { + return simd_length(q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_normalize(simd_quatf q) { + float length_squared = simd_length_squared(q.vector); + if (length_squared == 0) { + return simd_quaternion((simd_float4){0,0,0,1}); + } + return simd_quaternion(q.vector * simd_rsqrt(length_squared)); +} + +#if defined __arm__ || defined __arm64__ +/*! @abstract Multiplies the vector `v` by the quaternion `q`. + * + * @discussion This IS NOT the action of `q` on `v` (i.e. this is not rotation + * by `q`. That operation is provided by `simd_act(q, v)`. This function is an + * implementation detail and you should not call it directly. It may be + * removed or modified in future versions of the simd module. */ +static inline SIMD_CFUNC simd_quatf _simd_mul_vq(simd_float3 v, simd_quatf q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion(v.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + v.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5) + + v.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6)); +} +#endif + +static inline SIMD_CFUNC simd_float3 simd_act(simd_quatf q, simd_float3 v) { +#if defined __arm__ || defined __arm64__ + return simd_mul(q, _simd_mul_vq(v, simd_conjugate(q))).vector.xyz; +#else + #pragma STDC FP_CONTRACT ON + simd_float3 t = 2*simd_cross(simd_imag(q),v); + return v + simd_real(q)*t + simd_cross(simd_imag(q), t); +#endif +} + +static SIMD_NOINLINE simd_quatf __tg_log(simd_quatf q) { + float real = __tg_log(simd_length_squared(q.vector))/2; + if (simd_equal(simd_imag(q), 0)) return _simd_quaternion(0, real); + simd_float3 imag = __tg_acos(simd_real(q)/simd_length(q)) * simd_normalize(simd_imag(q)); + return _simd_quaternion(imag, real); +} + +static SIMD_NOINLINE simd_quatf __tg_exp(simd_quatf q) { + // angle is actually *twice* the angle of the rotation corresponding to + // the resulting quaternion, which is why we don't simply use the (angle, + // axis) constructor to generate `unit`. + float angle = simd_length(simd_imag(q)); + if (angle == 0) return _simd_quaternion(0, exp(simd_real(q))); + simd_float3 axis = simd_normalize(simd_imag(q)); + simd_quatf unit = _simd_quaternion(sin(angle)*axis, cosf(angle)); + return simd_mul(exp(simd_real(q)), unit); +} + +/*! @abstract Implementation detail of the `simd_quaternion(from, to)` + * initializer. + * + * @discussion Computes the quaternion rotation `from` to `to` if they are + * separated by less than 90 degrees. Not numerically stable for larger + * angles. This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static inline SIMD_CFUNC simd_quatf _simd_quaternion_reduced(simd_float3 from, simd_float3 to) { + simd_float3 half = simd_normalize(from + to); + return _simd_quaternion(simd_cross(from, half), simd_dot(from, half)); +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3 from, simd_float3 to) { + + // If the angle between from and to is not too big, we can compute the + // rotation accurately using a simple implementation. + if (simd_dot(from, to) >= 0) { + return _simd_quaternion_reduced(from, to); + } + + // Because from and to are more than 90 degrees apart, we compute the + // rotation in two stages (from -> half), (half -> to) to preserve numerical + // accuracy. + simd_float3 half = from + to; + + if (simd_length_squared(half) == 0) { + // half is nearly zero, so from and to point in nearly opposite directions + // and the rotation is numerically underspecified. Pick an axis orthogonal + // to the vectors, and use an angle of pi radians. + simd_float3 abs_from = simd_abs(from); + if (abs_from.x <= abs_from.y && abs_from.x <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){1,0,0})), 0.f); + else if (abs_from.y <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){0,1,0})), 0.f); + else + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){0,0,1})), 0.f); + } + + // Compute the two-step rotation. */ + half = simd_normalize(half); + return simd_mul(_simd_quaternion_reduced(from, half), + _simd_quaternion_reduced(half, to)); +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3x3 matrix) { + const simd_float3 *mat = matrix.columns; + float trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + float r = 2*sqrt(1 + trace); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + float rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + float r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float4x4 matrix) { + const simd_float4 *mat = matrix.columns; + float trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + float r = 2*sqrt(1 + trace); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + float rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + float r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +/*! @abstract The angle between p and q interpreted as 4-dimensional vectors. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE float _simd_angle(simd_quatf p, simd_quatf q) { + return 2*atan2(simd_length(p.vector - q.vector), simd_length(p.vector + q.vector)); +} + +/*! @abstract sin(x)/x. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_CFUNC float _simd_sinc(float x) { + if (x == 0) return 1; + return sin(x)/x; +} + +/*! @abstract Spherical lerp between q0 and q1. + * + * @discussion This function may interpolate along either the longer or + * shorter path between q0 and q1; it is used as an implementation detail + * in `simd_slerp` and `simd_slerp_longest`; you should use those functions + * instead of calling this directly. */ +static SIMD_NOINLINE simd_quatf _simd_slerp_internal(simd_quatf q0, simd_quatf q1, float t) { + float s = 1 - t; + float a = _simd_angle(q0, q1); + float r = simd_recip(_simd_sinc(a)); + return simd_normalize(simd_quaternion(_simd_sinc(s*a)*r*s*q0.vector + _simd_sinc(t*a)*r*t*q1.vector)); +} + +static SIMD_NOINLINE simd_quatf simd_slerp(simd_quatf q0, simd_quatf q1, float t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, q1, t); + return _simd_slerp_internal(q0, simd_negate(q1), t); +} + +static SIMD_NOINLINE simd_quatf simd_slerp_longest(simd_quatf q0, simd_quatf q1, float t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, simd_negate(q1), t); + return _simd_slerp_internal(q0, q1, t); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatf _simd_intermediate(simd_quatf q0, simd_quatf q1, simd_quatf q2) { + simd_quatf p0 = __tg_log(simd_mul(q0, simd_inverse(q1))); + simd_quatf p2 = __tg_log(simd_mul(q2, simd_inverse(q1))); + return simd_normalize(simd_mul(q1, __tg_exp(simd_mul(-0.25, simd_add(p0,p2))))); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatf _simd_squad(simd_quatf q0, simd_quatf qa, simd_quatf qb, simd_quatf q1, float t) { + simd_quatf r0 = _simd_slerp_internal(q0, q1, t); + simd_quatf r1 = _simd_slerp_internal(qa, qb, t); + return _simd_slerp_internal(r0, r1, 2*t*(1 - t)); +} + +static SIMD_NOINLINE simd_quatf simd_spline(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { + simd_quatf qa = _simd_intermediate(q0, q1, q2); + simd_quatf qb = _simd_intermediate(q1, q2, q3); + return _simd_squad(q1, qa, qb, q2, t); +} + +static SIMD_NOINLINE simd_quatf simd_bezier(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { + simd_quatf q01 = _simd_slerp_internal(q0, q1, t); + simd_quatf q12 = _simd_slerp_internal(q1, q2, t); + simd_quatf q23 = _simd_slerp_internal(q2, q3, t); + simd_quatf q012 = _simd_slerp_internal(q01, q12, t); + simd_quatf q123 = _simd_slerp_internal(q12, q23, t); + return _simd_slerp_internal(q012, q123, t); +} + +/* MARK: - C and Objective-C double interfaces */ + +/*! @abstract Constructs a quaternion from four scalar values. + * + * @param ix The first component of the imaginary (vector) part. + * @param iy The second component of the imaginary (vector) part. + * @param iz The third component of the imaginary (vector) part. + * + * @param r The real (scalar) part. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(double ix, double iy, double iz, double r) { + return (simd_quatd){ { ix, iy, iz, r } }; +} + +/*! @abstract Constructs a quaternion from an array of four scalars. + * + * @discussion Note that the imaginary part of the quaternion comes from + * array elements 0, 1, and 2, and the real part comes from element 3. */ +static inline SIMD_NONCONST simd_quatd simd_quaternion(const double xyzr[4]) { + return (simd_quatd){ *(const simd_packed_double4 *)xyzr }; +} + +/*! @abstract Constructs a quaternion from a four-element vector. + * + * @discussion Note that the imaginary (vector) part of the quaternion comes + * from lanes 0, 1, and 2 of the vector, and the real (scalar) part comes from + * lane 3. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(simd_double4 xyzr) { + return (simd_quatd){ xyzr }; +} + +/*! @abstract Constructs a quaternion that rotates by `angle` radians about + * `axis`. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(double angle, simd_double3 axis); + +/*! @abstract Construct a quaternion that rotates from one vector to another. + * + * @param from A normalized three-element vector. + * @param to A normalized three-element vector. + * + * @discussion The rotation axis is `simd_cross(from, to)`. If `from` and + * `to` point in opposite directions (to within machine precision), an + * arbitrary rotation axis is chosen, and the angle is pi radians. */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3 from, simd_double3 to); + +/*! @abstract Construct a quaternion from a 3x3 rotation `matrix`. + * + * @discussion If `matrix` is not orthogonal with determinant 1, the result + * is undefined. */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3x3 matrix); + +/*! @abstract Construct a quaternion from a 4x4 rotation `matrix`. + * + * @discussion The last row and column of the matrix are ignored. This + * function is equivalent to calling simd_quaternion with the upper-left 3x3 + * submatrix . */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double4x4 matrix); + +/*! @abstract The real (scalar) part of the quaternion `q`. */ +static inline SIMD_CFUNC double simd_real(simd_quatd q) { + return q.vector.w; +} + +/*! @abstract The imaginary (vector) part of the quaternion `q`. */ +static inline SIMD_CFUNC simd_double3 simd_imag(simd_quatd q) { + return q.vector.xyz; +} + +/*! @abstract The angle (in radians) of rotation represented by `q`. */ +static inline SIMD_CFUNC double simd_angle(simd_quatd q); + +/*! @abstract The normalized axis (a 3-element vector) around which the + * action of the quaternion `q` rotates. */ +static inline SIMD_CFUNC simd_double3 simd_axis(simd_quatd q); + +/*! @abstract The sum of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_add(simd_quatd p, simd_quatd q); + +/*! @abstract The difference of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_sub(simd_quatd p, simd_quatd q); + +/*! @abstract The product of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd p, simd_quatd q); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd q, double a); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(double a, simd_quatd q); + +/*! @abstract The conjugate of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_conjugate(simd_quatd q); + +/*! @abstract The (multiplicative) inverse of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_inverse(simd_quatd q); + +/*! @abstract The negation (additive inverse) of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_negate(simd_quatd q); + +/*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ +static inline SIMD_CFUNC double simd_dot(simd_quatd p, simd_quatd q); + +/*! @abstract The length of the quaternion `q`. */ +static inline SIMD_CFUNC double simd_length(simd_quatd q); + +/*! @abstract The unit quaternion obtained by normalizing `q`. */ +static inline SIMD_CFUNC simd_quatd simd_normalize(simd_quatd q); + +/*! @abstract Rotates the vector `v` by the quaternion `q`. */ +static inline SIMD_CFUNC simd_double3 simd_act(simd_quatd q, simd_double3 v); + +/*! @abstract Logarithm of the quaternion `q`. + * @discussion Do not call this function directly; use `log(q)` instead. + * + * We can write a quaternion `q` in the form: `r(cos(t) + sin(t)v)` where + * `r` is the length of `q`, `t` is an angle, and `v` is a unit 3-vector. + * The logarithm of `q` is `log(r) + tv`, just like the logarithm of the + * complex number `r*(cos(t) + i sin(t))` is `log(r) + it`. + * + * Note that this function is not robust against poorly-scaled non-unit + * quaternions, because it is primarily used for spline interpolation of + * unit quaternions. If you need to compute a robust logarithm of general + * quaternions, you can use the following approach: + * + * scale = simd_reduce_max(simd_abs(q.vector)); + * logq = log(simd_recip(scale)*q); + * logq.real += log(scale); + * return logq; */ +static SIMD_NOINLINE simd_quatd __tg_log(simd_quatd q); + +/*! @abstract Inverse of `log( )`; the exponential map on quaternions. + * @discussion Do not call this function directly; use `exp(q)` instead. */ +static SIMD_NOINLINE simd_quatd __tg_exp(simd_quatd q); + +/*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatd simd_slerp(simd_quatd q0, simd_quatd q1, double t); + +/*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatd simd_slerp_longest(simd_quatd q0, simd_quatd q1, double t); + +/*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ +static SIMD_NOINLINE simd_quatd simd_spline(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t); + +/*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ +static SIMD_NOINLINE simd_quatd simd_bezier(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t); + +#ifdef __cplusplus +} /* extern "C" */ +/* MARK: - C++ double interfaces */ + +namespace simd { + struct quatd : ::simd_quatd { + /*! @abstract The identity quaternion. */ + quatd( ) : ::simd_quatd(::simd_quaternion((double4){0,0,0,1})) { } + + /*! @abstract Constructs a C++ quaternion from a C quaternion. */ + quatd(::simd_quatd q) : ::simd_quatd(q) { } + + /*! @abstract Constructs a quaternion from components. */ + quatd(double ix, double iy, double iz, double r) : ::simd_quatd(::simd_quaternion(ix, iy, iz, r)) { } + + /*! @abstract Constructs a quaternion from an array of scalars. */ + quatd(const double xyzr[4]) : ::simd_quatd(::simd_quaternion(xyzr)) { } + + /*! @abstract Constructs a quaternion from a vector. */ + quatd(double4 xyzr) : ::simd_quatd(::simd_quaternion(xyzr)) { } + + /*! @abstract Quaternion representing rotation about `axis` by `angle` + * radians. */ + quatd(double angle, double3 axis) : ::simd_quatd(::simd_quaternion(angle, axis)) { } + + /*! @abstract Quaternion that rotates `from` into `to`. */ + quatd(double3 from, double3 to) : ::simd_quatd(::simd_quaternion(from, to)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatd(::simd_double3x3 matrix) : ::simd_quatd(::simd_quaternion(matrix)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatd(::simd_double4x4 matrix) : ::simd_quatd(::simd_quaternion(matrix)) { } + + /*! @abstract The real (scalar) part of the quaternion. */ + double real(void) const { return ::simd_real(*this); } + + /*! @abstract The imaginary (vector) part of the quaternion. */ + double3 imag(void) const { return ::simd_imag(*this); } + + /*! @abstract The angle the quaternion rotates by. */ + double angle(void) const { return ::simd_angle(*this); } + + /*! @abstract The axis the quaternion rotates about. */ + double3 axis(void) const { return ::simd_axis(*this); } + + /*! @abstract The length of the quaternion. */ + double length(void) const { return ::simd_length(*this); } + + /*! @abstract Act on the vector `v` by rotation. */ + double3 operator()(const ::simd_double3 v) const { return ::simd_act(*this, v); } + }; + + static SIMD_CPPFUNC quatd operator+(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_add(p, q); } + static SIMD_CPPFUNC quatd operator-(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_sub(p, q); } + static SIMD_CPPFUNC quatd operator-(const ::simd_quatd p) { return ::simd_negate(p); } + static SIMD_CPPFUNC quatd operator*(const double r, const ::simd_quatd p) { return ::simd_mul(r, p); } + static SIMD_CPPFUNC quatd operator*(const ::simd_quatd p, const double r) { return ::simd_mul(p, r); } + static SIMD_CPPFUNC quatd operator*(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_mul(p, q); } + static SIMD_CPPFUNC quatd operator/(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_mul(p, ::simd_inverse(q)); } + static SIMD_CPPFUNC quatd operator+=(quatd &p, const ::simd_quatd q) { return p = p+q; } + static SIMD_CPPFUNC quatd operator-=(quatd &p, const ::simd_quatd q) { return p = p-q; } + static SIMD_CPPFUNC quatd operator*=(quatd &p, const double r) { return p = p*r; } + static SIMD_CPPFUNC quatd operator*=(quatd &p, const ::simd_quatd q) { return p = p*q; } + static SIMD_CPPFUNC quatd operator/=(quatd &p, const ::simd_quatd q) { return p = p/q; } + + /*! @abstract The conjugate of the quaternion `q`. */ + static SIMD_CPPFUNC quatd conjugate(const ::simd_quatd p) { return ::simd_conjugate(p); } + + /*! @abstract The (multiplicative) inverse of the quaternion `q`. */ + static SIMD_CPPFUNC quatd inverse(const ::simd_quatd p) { return ::simd_inverse(p); } + + /*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ + static SIMD_CPPFUNC double dot(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_dot(p, q); } + + /*! @abstract The unit quaternion obtained by normalizing `q`. */ + static SIMD_CPPFUNC quatd normalize(const ::simd_quatd p) { return ::simd_normalize(p); } + + /*! @abstract logarithm of the quaternion `q`. */ + static SIMD_CPPFUNC quatd log(const ::simd_quatd q) { return ::__tg_log(q); } + + /*! @abstract exponential map of quaterion `q`. */ + static SIMD_CPPFUNC quatd exp(const ::simd_quatd q) { return ::__tg_exp(q); } + + /*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatd slerp(const ::simd_quatd p0, const ::simd_quatd p1, double t) { return ::simd_slerp(p0, p1, t); } + + /*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatd slerp_longest(const ::simd_quatd p0, const ::simd_quatd p1, double t) { return ::simd_slerp_longest(p0, p1, t); } + + /*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ + static SIMD_CPPFUNC quatd spline(const ::simd_quatd p0, const ::simd_quatd p1, const ::simd_quatd p2, const ::simd_quatd p3, double t) { return ::simd_spline(p0, p1, p2, p3, t); } + + /*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ + static SIMD_CPPFUNC quatd bezier(const ::simd_quatd p0, const ::simd_quatd p1, const ::simd_quatd p2, const ::simd_quatd p3, double t) { return ::simd_bezier(p0, p1, p2, p3, t); } +} + +extern "C" { +#endif /* __cplusplus */ + +/* MARK: - double implementations */ + +#include <simd/math.h> +#include <simd/geometry.h> + +/* tg_promote is implementation gobbledygook that enables the compile-time + * dispatching in tgmath.h to work its magic. */ +static simd_quatd __attribute__((__overloadable__)) __tg_promote(simd_quatd); + +/*! @abstract Constructs a quaternion from imaginary and real parts. + * @discussion This function is hidden behind an underscore to avoid confusion + * with the angle-axis constructor. */ +static inline SIMD_CFUNC simd_quatd _simd_quaternion(simd_double3 imag, double real) { + return simd_quaternion(simd_make_double4(imag, real)); +} + +static inline SIMD_CFUNC simd_quatd simd_quaternion(double angle, simd_double3 axis) { + return _simd_quaternion(sin(angle/2) * axis, cos(angle/2)); +} + +static inline SIMD_CFUNC double simd_angle(simd_quatd q) { + return 2*atan2(simd_length(q.vector.xyz), q.vector.w); +} + +static inline SIMD_CFUNC simd_double3 simd_axis(simd_quatd q) { + return simd_normalize(q.vector.xyz); +} + +static inline SIMD_CFUNC simd_quatd simd_add(simd_quatd p, simd_quatd q) { + return simd_quaternion(p.vector + q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_sub(simd_quatd p, simd_quatd q) { + return simd_quaternion(p.vector - q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd p, simd_quatd q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion((p.vector.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + p.vector.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5)) + + (p.vector.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6) + + p.vector.w * q.vector)); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd q, double a) { + return simd_quaternion(a * q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(double a, simd_quatd q) { + return simd_mul(q,a); +} + +static inline SIMD_CFUNC simd_quatd simd_conjugate(simd_quatd q) { + return simd_quaternion(q.vector * (simd_double4){-1,-1,-1, 1}); +} + +static inline SIMD_CFUNC simd_quatd simd_inverse(simd_quatd q) { + return simd_quaternion(simd_conjugate(q).vector * simd_recip(simd_length_squared(q.vector))); +} + +static inline SIMD_CFUNC simd_quatd simd_negate(simd_quatd q) { + return simd_quaternion(-q.vector); +} + +static inline SIMD_CFUNC double simd_dot(simd_quatd p, simd_quatd q) { + return simd_dot(p.vector, q.vector); +} + +static inline SIMD_CFUNC double simd_length(simd_quatd q) { + return simd_length(q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_normalize(simd_quatd q) { + double length_squared = simd_length_squared(q.vector); + if (length_squared == 0) { + return simd_quaternion((simd_double4){0,0,0,1}); + } + return simd_quaternion(q.vector * simd_rsqrt(length_squared)); +} + +#if defined __arm__ || defined __arm64__ +/*! @abstract Multiplies the vector `v` by the quaternion `q`. + * + * @discussion This IS NOT the action of `q` on `v` (i.e. this is not rotation + * by `q`. That operation is provided by `simd_act(q, v)`. This function is an + * implementation detail and you should not call it directly. It may be + * removed or modified in future versions of the simd module. */ +static inline SIMD_CFUNC simd_quatd _simd_mul_vq(simd_double3 v, simd_quatd q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion(v.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + v.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5) + + v.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6)); +} +#endif + +static inline SIMD_CFUNC simd_double3 simd_act(simd_quatd q, simd_double3 v) { +#if defined __arm__ || defined __arm64__ + return simd_mul(q, _simd_mul_vq(v, simd_conjugate(q))).vector.xyz; +#else + #pragma STDC FP_CONTRACT ON + simd_double3 t = 2*simd_cross(simd_imag(q),v); + return v + simd_real(q)*t + simd_cross(simd_imag(q), t); +#endif +} + +static SIMD_NOINLINE simd_quatd __tg_log(simd_quatd q) { + double real = __tg_log(simd_length_squared(q.vector))/2; + if (simd_equal(simd_imag(q), 0)) return _simd_quaternion(0, real); + simd_double3 imag = __tg_acos(simd_real(q)/simd_length(q)) * simd_normalize(simd_imag(q)); + return _simd_quaternion(imag, real); +} + +static SIMD_NOINLINE simd_quatd __tg_exp(simd_quatd q) { + // angle is actually *twice* the angle of the rotation corresponding to + // the resulting quaternion, which is why we don't simply use the (angle, + // axis) constructor to generate `unit`. + double angle = simd_length(simd_imag(q)); + if (angle == 0) return _simd_quaternion(0, exp(simd_real(q))); + simd_double3 axis = simd_normalize(simd_imag(q)); + simd_quatd unit = _simd_quaternion(sin(angle)*axis, cosf(angle)); + return simd_mul(exp(simd_real(q)), unit); +} + +/*! @abstract Implementation detail of the `simd_quaternion(from, to)` + * initializer. + * + * @discussion Computes the quaternion rotation `from` to `to` if they are + * separated by less than 90 degrees. Not numerically stable for larger + * angles. This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static inline SIMD_CFUNC simd_quatd _simd_quaternion_reduced(simd_double3 from, simd_double3 to) { + simd_double3 half = simd_normalize(from + to); + return _simd_quaternion(simd_cross(from, half), simd_dot(from, half)); +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3 from, simd_double3 to) { + + // If the angle between from and to is not too big, we can compute the + // rotation accurately using a simple implementation. + if (simd_dot(from, to) >= 0) { + return _simd_quaternion_reduced(from, to); + } + + // Because from and to are more than 90 degrees apart, we compute the + // rotation in two stages (from -> half), (half -> to) to preserve numerical + // accuracy. + simd_double3 half = from + to; + + if (simd_length_squared(half) == 0) { + // half is nearly zero, so from and to point in nearly opposite directions + // and the rotation is numerically underspecified. Pick an axis orthogonal + // to the vectors, and use an angle of pi radians. + simd_double3 abs_from = simd_abs(from); + if (abs_from.x <= abs_from.y && abs_from.x <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){1,0,0})), 0.f); + else if (abs_from.y <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){0,1,0})), 0.f); + else + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){0,0,1})), 0.f); + } + + // Compute the two-step rotation. */ + half = simd_normalize(half); + return simd_mul(_simd_quaternion_reduced(from, half), + _simd_quaternion_reduced(half, to)); +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3x3 matrix) { + const simd_double3 *mat = matrix.columns; + double trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + double r = 2*sqrt(1 + trace); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + double rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + double r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double4x4 matrix) { + const simd_double4 *mat = matrix.columns; + double trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + double r = 2*sqrt(1 + trace); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + double rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + double r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +/*! @abstract The angle between p and q interpreted as 4-dimensional vectors. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE double _simd_angle(simd_quatd p, simd_quatd q) { + return 2*atan2(simd_length(p.vector - q.vector), simd_length(p.vector + q.vector)); +} + +/*! @abstract sin(x)/x. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_CFUNC double _simd_sinc(double x) { + if (x == 0) return 1; + return sin(x)/x; +} + +/*! @abstract Spherical lerp between q0 and q1. + * + * @discussion This function may interpolate along either the longer or + * shorter path between q0 and q1; it is used as an implementation detail + * in `simd_slerp` and `simd_slerp_longest`; you should use those functions + * instead of calling this directly. */ +static SIMD_NOINLINE simd_quatd _simd_slerp_internal(simd_quatd q0, simd_quatd q1, double t) { + double s = 1 - t; + double a = _simd_angle(q0, q1); + double r = simd_recip(_simd_sinc(a)); + return simd_normalize(simd_quaternion(_simd_sinc(s*a)*r*s*q0.vector + _simd_sinc(t*a)*r*t*q1.vector)); +} + +static SIMD_NOINLINE simd_quatd simd_slerp(simd_quatd q0, simd_quatd q1, double t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, q1, t); + return _simd_slerp_internal(q0, simd_negate(q1), t); +} + +static SIMD_NOINLINE simd_quatd simd_slerp_longest(simd_quatd q0, simd_quatd q1, double t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, simd_negate(q1), t); + return _simd_slerp_internal(q0, q1, t); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatd _simd_intermediate(simd_quatd q0, simd_quatd q1, simd_quatd q2) { + simd_quatd p0 = __tg_log(simd_mul(q0, simd_inverse(q1))); + simd_quatd p2 = __tg_log(simd_mul(q2, simd_inverse(q1))); + return simd_normalize(simd_mul(q1, __tg_exp(simd_mul(-0.25, simd_add(p0,p2))))); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatd _simd_squad(simd_quatd q0, simd_quatd qa, simd_quatd qb, simd_quatd q1, double t) { + simd_quatd r0 = _simd_slerp_internal(q0, q1, t); + simd_quatd r1 = _simd_slerp_internal(qa, qb, t); + return _simd_slerp_internal(r0, r1, 2*t*(1 - t)); +} + +static SIMD_NOINLINE simd_quatd simd_spline(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t) { + simd_quatd qa = _simd_intermediate(q0, q1, q2); + simd_quatd qb = _simd_intermediate(q1, q2, q3); + return _simd_squad(q1, qa, qb, q2, t); +} + +static SIMD_NOINLINE simd_quatd simd_bezier(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t) { + simd_quatd q01 = _simd_slerp_internal(q0, q1, t); + simd_quatd q12 = _simd_slerp_internal(q1, q2, t); + simd_quatd q23 = _simd_slerp_internal(q2, q3, t); + simd_quatd q012 = _simd_slerp_internal(q01, q12, t); + simd_quatd q123 = _simd_slerp_internal(q12, q23, t); + return _simd_slerp_internal(q012, q123, t); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_QUATERNIONS */
\ No newline at end of file |
