diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2021-08-15 18:00:10 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2021-08-15 18:00:10 -0700 |
| commit | 21606339af2712d94bb3cfdcc9050287c5a2134c (patch) | |
| tree | d5df6035a82eb191bf6f2d92518c5250d31833d5 /lib/include/opencl-c.h | |
| parent | 78ff2a148a707f041ab5e5cfdbb5f854bc66270e (diff) | |
| download | zig-21606339af2712d94bb3cfdcc9050287c5a2134c.tar.gz zig-21606339af2712d94bb3cfdcc9050287c5a2134c.zip | |
update C header files to clang 13 rc1
Diffstat (limited to 'lib/include/opencl-c.h')
| -rw-r--r-- | lib/include/opencl-c.h | 860 |
1 files changed, 505 insertions, 355 deletions
diff --git a/lib/include/opencl-c.h b/lib/include/opencl-c.h index ab665628c8..fc50dd718c 100644 --- a/lib/include/opencl-c.h +++ b/lib/include/opencl-c.h @@ -23,10 +23,11 @@ #endif //cl_khr_3d_image_writes #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) + +#if (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__) #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end -#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) +#endif // (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__) #define __ovld __attribute__((overloadable)) #define __conv __attribute__((convergent)) @@ -6339,101 +6340,6 @@ half16 __ovld __cnfn convert_half16_rtz(double16); #endif // cl_khr_fp16 -/** - * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators - * Reinterprets a data type as another data type of the same size - */ -#define as_char(x) __builtin_astype((x), char) -#define as_char2(x) __builtin_astype((x), char2) -#define as_char3(x) __builtin_astype((x), char3) -#define as_char4(x) __builtin_astype((x), char4) -#define as_char8(x) __builtin_astype((x), char8) -#define as_char16(x) __builtin_astype((x), char16) - -#define as_uchar(x) __builtin_astype((x), uchar) -#define as_uchar2(x) __builtin_astype((x), uchar2) -#define as_uchar3(x) __builtin_astype((x), uchar3) -#define as_uchar4(x) __builtin_astype((x), uchar4) -#define as_uchar8(x) __builtin_astype((x), uchar8) -#define as_uchar16(x) __builtin_astype((x), uchar16) - -#define as_short(x) __builtin_astype((x), short) -#define as_short2(x) __builtin_astype((x), short2) -#define as_short3(x) __builtin_astype((x), short3) -#define as_short4(x) __builtin_astype((x), short4) -#define as_short8(x) __builtin_astype((x), short8) -#define as_short16(x) __builtin_astype((x), short16) - -#define as_ushort(x) __builtin_astype((x), ushort) -#define as_ushort2(x) __builtin_astype((x), ushort2) -#define as_ushort3(x) __builtin_astype((x), ushort3) -#define as_ushort4(x) __builtin_astype((x), ushort4) -#define as_ushort8(x) __builtin_astype((x), ushort8) -#define as_ushort16(x) __builtin_astype((x), ushort16) - -#define as_int(x) __builtin_astype((x), int) -#define as_int2(x) __builtin_astype((x), int2) -#define as_int3(x) __builtin_astype((x), int3) -#define as_int4(x) __builtin_astype((x), int4) -#define as_int8(x) __builtin_astype((x), int8) -#define as_int16(x) __builtin_astype((x), int16) - -#define as_uint(x) __builtin_astype((x), uint) -#define as_uint2(x) __builtin_astype((x), uint2) -#define as_uint3(x) __builtin_astype((x), uint3) -#define as_uint4(x) __builtin_astype((x), uint4) -#define as_uint8(x) __builtin_astype((x), uint8) -#define as_uint16(x) __builtin_astype((x), uint16) - -#define as_long(x) __builtin_astype((x), long) -#define as_long2(x) __builtin_astype((x), long2) -#define as_long3(x) __builtin_astype((x), long3) -#define as_long4(x) __builtin_astype((x), long4) -#define as_long8(x) __builtin_astype((x), long8) -#define as_long16(x) __builtin_astype((x), long16) - -#define as_ulong(x) __builtin_astype((x), ulong) -#define as_ulong2(x) __builtin_astype((x), ulong2) -#define as_ulong3(x) __builtin_astype((x), ulong3) -#define as_ulong4(x) __builtin_astype((x), ulong4) -#define as_ulong8(x) __builtin_astype((x), ulong8) -#define as_ulong16(x) __builtin_astype((x), ulong16) - -#define as_float(x) __builtin_astype((x), float) -#define as_float2(x) __builtin_astype((x), float2) -#define as_float3(x) __builtin_astype((x), float3) -#define as_float4(x) __builtin_astype((x), float4) -#define as_float8(x) __builtin_astype((x), float8) -#define as_float16(x) __builtin_astype((x), float16) - -#ifdef cl_khr_fp64 -#define as_double(x) __builtin_astype((x), double) -#define as_double2(x) __builtin_astype((x), double2) -#define as_double3(x) __builtin_astype((x), double3) -#define as_double4(x) __builtin_astype((x), double4) -#define as_double8(x) __builtin_astype((x), double8) -#define as_double16(x) __builtin_astype((x), double16) -#endif //cl_khr_fp64 - -#ifdef cl_khr_fp16 -#define as_half(x) __builtin_astype((x), half) -#define as_half2(x) __builtin_astype((x), half2) -#define as_half3(x) __builtin_astype((x), half3) -#define as_half4(x) __builtin_astype((x), half4) -#define as_half8(x) __builtin_astype((x), half8) -#define as_half16(x) __builtin_astype((x), half16) -#endif //cl_khr_fp16 - -// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers - -#define __kernel_exec(X, typen) __kernel \ - __attribute__((work_group_size_hint(X, 1, 1))) \ - __attribute__((vec_type_hint(typen))) - -#define kernel_exec(X, typen) __kernel \ - __attribute__((work_group_size_hint(X, 1, 1))) \ - __attribute__((vec_type_hint(typen))) - // OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions /** @@ -6494,8 +6400,7 @@ size_t __ovld __cnfn get_local_id(uint dimindx); * Returns the number of work-groups that will execute a * kernel for dimension identified by dimindx. * Valid values of dimindx are 0 to get_work_dim() - 1. - * For other values of dimindx, get_num_groups () returns - * 1. + * For other values of dimindx, get_num_groups() returns 1. * For clEnqueueTask, this always returns 1. */ size_t __ovld __cnfn get_num_groups(uint dimindx); @@ -7354,7 +7259,7 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); * Returns fmin(x - floor (x), 0x1.fffffep-1f ). * floor(x) is returned in iptr. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld fract(float x, float *iptr); float2 __ovld fract(float2 x, float2 *iptr); float3 __ovld fract(float3 x, float3 *iptr); @@ -7436,7 +7341,7 @@ half4 __ovld fract(half4 x, __private half4 *iptr); half8 __ovld fract(half8 x, __private half8 *iptr); half16 __ovld fract(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Extract mantissa and exponent from x. For each @@ -7444,7 +7349,7 @@ half16 __ovld fract(half16 x, __private half16 *iptr); * magnitude in the interval [1/2, 1) or 0. Each * component of x equals mantissa returned * 2^exp. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld frexp(float x, int *exp); float2 __ovld frexp(float2 x, int2 *exp); float3 __ovld frexp(float3 x, int3 *exp); @@ -7526,7 +7431,7 @@ half4 __ovld frexp(half4 x, __private int4 *exp); half8 __ovld frexp(half8 x, __private int8 *exp); half16 __ovld frexp(half16 x, __private int16 *exp); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Compute the value of the square root of x^2 + y^2 @@ -7651,7 +7556,7 @@ half8 __ovld __cnfn lgamma(half8 x); half16 __ovld __cnfn lgamma(half16 x); #endif //cl_khr_fp16 -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld lgamma_r(float x, int *signp); float2 __ovld lgamma_r(float2 x, int2 *signp); float3 __ovld lgamma_r(float3 x, int3 *signp); @@ -7733,7 +7638,7 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); half8 __ovld lgamma_r(half8 x, __private int8 *signp); half16 __ovld lgamma_r(half16 x, __private int16 *signp); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Compute natural logarithm. @@ -7957,7 +7862,7 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); * the argument. It stores the integral part in the object * pointed to by iptr. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld modf(float x, float *iptr); float2 __ovld modf(float2 x, float2 *iptr); float3 __ovld modf(float3 x, float3 *iptr); @@ -8039,7 +7944,7 @@ half4 __ovld modf(half4 x, __private half4 *iptr); half8 __ovld modf(half8 x, __private half8 *iptr); half16 __ovld modf(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Returns a quiet NaN. The nancode may be placed @@ -8217,7 +8122,7 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); * sign as x/y. It stores this signed value in the object * pointed to by quo. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld remquo(float x, float y, int *quo); float2 __ovld remquo(float2 x, float2 y, int2 *quo); float3 __ovld remquo(float3 x, float3 y, int3 *quo); @@ -8300,7 +8205,7 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Round to integral value (using round to nearest * even rounding mode) in floating-point format. @@ -8441,7 +8346,7 @@ half16 __ovld __cnfn sin(half16); * is the return value and computed cosine is returned * in cosval. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld sincos(float x, float *cosval); float2 __ovld sincos(float2 x, float2 *cosval); float3 __ovld sincos(float3 x, float3 *cosval); @@ -8523,7 +8428,7 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); half8 __ovld sincos(half8 x, __private half8 *cosval); half16 __ovld sincos(half16 x, __private half16 *cosval); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Compute hyperbolic sine. @@ -9449,54 +9354,54 @@ ulong16 __ovld __cnfn clz(ulong16 x); * component type of x, if x is a vector. */ #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -char __ovld ctz(char x); -uchar __ovld ctz(uchar x); -char2 __ovld ctz(char2 x); -uchar2 __ovld ctz(uchar2 x); -char3 __ovld ctz(char3 x); -uchar3 __ovld ctz(uchar3 x); -char4 __ovld ctz(char4 x); -uchar4 __ovld ctz(uchar4 x); -char8 __ovld ctz(char8 x); -uchar8 __ovld ctz(uchar8 x); -char16 __ovld ctz(char16 x); -uchar16 __ovld ctz(uchar16 x); -short __ovld ctz(short x); -ushort __ovld ctz(ushort x); -short2 __ovld ctz(short2 x); -ushort2 __ovld ctz(ushort2 x); -short3 __ovld ctz(short3 x); -ushort3 __ovld ctz(ushort3 x); -short4 __ovld ctz(short4 x); -ushort4 __ovld ctz(ushort4 x); -short8 __ovld ctz(short8 x); -ushort8 __ovld ctz(ushort8 x); -short16 __ovld ctz(short16 x); -ushort16 __ovld ctz(ushort16 x); -int __ovld ctz(int x); -uint __ovld ctz(uint x); -int2 __ovld ctz(int2 x); -uint2 __ovld ctz(uint2 x); -int3 __ovld ctz(int3 x); -uint3 __ovld ctz(uint3 x); -int4 __ovld ctz(int4 x); -uint4 __ovld ctz(uint4 x); -int8 __ovld ctz(int8 x); -uint8 __ovld ctz(uint8 x); -int16 __ovld ctz(int16 x); -uint16 __ovld ctz(uint16 x); -long __ovld ctz(long x); -ulong __ovld ctz(ulong x); -long2 __ovld ctz(long2 x); -ulong2 __ovld ctz(ulong2 x); -long3 __ovld ctz(long3 x); -ulong3 __ovld ctz(ulong3 x); -long4 __ovld ctz(long4 x); -ulong4 __ovld ctz(ulong4 x); -long8 __ovld ctz(long8 x); -ulong8 __ovld ctz(ulong8 x); -long16 __ovld ctz(long16 x); -ulong16 __ovld ctz(ulong16 x); +char __ovld __cnfn ctz(char x); +uchar __ovld __cnfn ctz(uchar x); +char2 __ovld __cnfn ctz(char2 x); +uchar2 __ovld __cnfn ctz(uchar2 x); +char3 __ovld __cnfn ctz(char3 x); +uchar3 __ovld __cnfn ctz(uchar3 x); +char4 __ovld __cnfn ctz(char4 x); +uchar4 __ovld __cnfn ctz(uchar4 x); +char8 __ovld __cnfn ctz(char8 x); +uchar8 __ovld __cnfn ctz(uchar8 x); +char16 __ovld __cnfn ctz(char16 x); +uchar16 __ovld __cnfn ctz(uchar16 x); +short __ovld __cnfn ctz(short x); +ushort __ovld __cnfn ctz(ushort x); +short2 __ovld __cnfn ctz(short2 x); +ushort2 __ovld __cnfn ctz(ushort2 x); +short3 __ovld __cnfn ctz(short3 x); +ushort3 __ovld __cnfn ctz(ushort3 x); +short4 __ovld __cnfn ctz(short4 x); +ushort4 __ovld __cnfn ctz(ushort4 x); +short8 __ovld __cnfn ctz(short8 x); +ushort8 __ovld __cnfn ctz(ushort8 x); +short16 __ovld __cnfn ctz(short16 x); +ushort16 __ovld __cnfn ctz(ushort16 x); +int __ovld __cnfn ctz(int x); +uint __ovld __cnfn ctz(uint x); +int2 __ovld __cnfn ctz(int2 x); +uint2 __ovld __cnfn ctz(uint2 x); +int3 __ovld __cnfn ctz(int3 x); +uint3 __ovld __cnfn ctz(uint3 x); +int4 __ovld __cnfn ctz(int4 x); +uint4 __ovld __cnfn ctz(uint4 x); +int8 __ovld __cnfn ctz(int8 x); +uint8 __ovld __cnfn ctz(uint8 x); +int16 __ovld __cnfn ctz(int16 x); +uint16 __ovld __cnfn ctz(uint16 x); +long __ovld __cnfn ctz(long x); +ulong __ovld __cnfn ctz(ulong x); +long2 __ovld __cnfn ctz(long2 x); +ulong2 __ovld __cnfn ctz(ulong2 x); +long3 __ovld __cnfn ctz(long3 x); +ulong3 __ovld __cnfn ctz(ulong3 x); +long4 __ovld __cnfn ctz(long4 x); +ulong4 __ovld __cnfn ctz(ulong4 x); +long8 __ovld __cnfn ctz(long8 x); +ulong8 __ovld __cnfn ctz(ulong8 x); +long16 __ovld __cnfn ctz(long16 x); +ulong16 __ovld __cnfn ctz(ulong16 x); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** @@ -10002,6 +9907,7 @@ ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo); /* * popcount(x): returns the number of set bit in x */ +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) char __ovld __cnfn popcount(char x); uchar __ovld __cnfn popcount(uchar x); char2 __ovld __cnfn popcount(char2 x); @@ -10050,6 +9956,7 @@ long8 __ovld __cnfn popcount(long8 x); ulong8 __ovld __cnfn popcount(ulong8 x); long16 __ovld __cnfn popcount(long16 x); ulong16 __ovld __cnfn popcount(ulong16 x); +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Multiply two 24-bit integer values x and y and add @@ -11342,7 +11249,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); half16 __ovld vload16(size_t offset, const __constant half *p); #endif //cl_khr_fp16 -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) char2 __ovld vload2(size_t offset, const char *p); uchar2 __ovld vload2(size_t offset, const uchar *p); short2 __ovld vload2(size_t offset, const short *p); @@ -11580,9 +11487,9 @@ half4 __ovld vload4(size_t offset, const __private half *p); half8 __ovld vload8(size_t offset, const __private half *p); half16 __ovld vload16(size_t offset, const __private half *p); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) void __ovld vstore2(char2 data, size_t offset, char *p); void __ovld vstore2(uchar2 data, size_t offset, uchar *p); void __ovld vstore2(short2 data, size_t offset, short *p); @@ -11816,7 +11723,7 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); void __ovld vstore8(half8 data, size_t offset, __private half *p); void __ovld vstore16(half16 data, size_t offset, __private half *p); #endif //cl_khr_fp16 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Read sizeof (half) bytes of data from address @@ -11827,13 +11734,13 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); * must be 16-bit aligned. */ float __ovld vload_half(size_t offset, const __constant half *p); -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld vload_half(size_t offset, const half *p); #else float __ovld vload_half(size_t offset, const __global half *p); float __ovld vload_half(size_t offset, const __local half *p); float __ovld vload_half(size_t offset, const __private half *p); -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * Read sizeof (halfn) bytes of data from address @@ -11848,7 +11755,7 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); float4 __ovld vload_half4(size_t offset, const __constant half *p); float8 __ovld vload_half8(size_t offset, const __constant half *p); float16 __ovld vload_half16(size_t offset, const __constant half *p); -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float2 __ovld vload_half2(size_t offset, const half *p); float3 __ovld vload_half3(size_t offset, const half *p); float4 __ovld vload_half4(size_t offset, const half *p); @@ -11870,7 +11777,7 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); float4 __ovld vload_half4(size_t offset, const __private half *p); float8 __ovld vload_half8(size_t offset, const __private half *p); float16 __ovld vload_half16(size_t offset, const __private half *p); -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * The float value given by data is first @@ -11883,7 +11790,7 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); * The default current rounding mode is round to * nearest even. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) void __ovld vstore_half(float data, size_t offset, half *p); void __ovld vstore_half_rte(float data, size_t offset, half *p); void __ovld vstore_half_rtz(float data, size_t offset, half *p); @@ -11929,7 +11836,7 @@ void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * The floatn value given by data is converted to @@ -11942,7 +11849,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); * The default current rounding mode is round to * nearest even. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) void __ovld vstore_half2(float2 data, size_t offset, half *p); void __ovld vstore_half3(float3 data, size_t offset, half *p); void __ovld vstore_half4(float4 data, size_t offset, half *p); @@ -12148,7 +12055,7 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) @@ -12169,7 +12076,7 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); float4 __ovld vloada_half4(size_t offset, const __constant half *p); float8 __ovld vloada_half8(size_t offset, const __constant half *p); float16 __ovld vloada_half16(size_t offset, const __constant half *p); -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) float __ovld vloada_half(size_t offset, const half *p); float2 __ovld vloada_half2(size_t offset, const half *p); float3 __ovld vloada_half3(size_t offset, const half *p); @@ -12195,7 +12102,7 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); float4 __ovld vloada_half4(size_t offset, const __private half *p); float8 __ovld vloada_half8(size_t offset, const __private half *p); float16 __ovld vloada_half16(size_t offset, const __private half *p); -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) /** * The floatn value given by data is converted to @@ -12213,7 +12120,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); * mode. The default current rounding mode is * round to nearest even. */ -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) void __ovld vstorea_half(float data, size_t offset, half *p); void __ovld vstorea_half2(float2 data, size_t offset, half *p); void __ovld vstorea_half3(float3 data, size_t offset, half *p); @@ -12498,7 +12405,7 @@ void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions @@ -12582,7 +12489,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__opencl_c_generic_address_space) cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); @@ -12593,7 +12500,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); * where gentype is builtin type or user defined type. */ -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif //defined(__opencl_c_generic_address_space) // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch @@ -13397,291 +13304,324 @@ void __ovld atomic_init(volatile atomic_double *object, double value); void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); // atomic_fetch() +// OpenCL v2.0 s6.13.11.7.5: +// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); -int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); -int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); -long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); -long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); +uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#endif -// OpenCL v2.0 s6.13.11.7.5: -// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. -// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. - +#if defined(__opencl_c_atomic_scope_device) +int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); +int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) -uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); +long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); +long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); -uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); -uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); -uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#endif -uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); -uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); -uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); -uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); -uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); -uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); -uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); -uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); -uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); -uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); -uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); -uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); -uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); -uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); -uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); - -intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); -intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); -intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); -intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); -intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); -intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); -intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); -intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); -intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); -intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); -intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); -intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); -intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); -intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); -intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); +int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); +#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); +uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); #endif // atomic_store() +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) void __ovld atomic_store(volatile atomic_int *object, int desired); -void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); -void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); void __ovld atomic_store(volatile atomic_uint *object, uint desired); -void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); -void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); void __ovld atomic_store(volatile atomic_float *object, float desired); -void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); -void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); + #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 void __ovld atomic_store(volatile atomic_double *object, double desired); -void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); -void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); #endif //cl_khr_fp64 void __ovld atomic_store(volatile atomic_long *object, long desired); -void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); -void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); +#endif +#endif + +#if defined(__opencl_c_atomic_scope_device) +void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); +void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); +void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); +#endif //cl_khr_fp64 +void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +#endif +#endif + +void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); +void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); +void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +#endif //cl_khr_fp64 +void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); #endif // atomic_load() - +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) int __ovld atomic_load(volatile atomic_int *object); -int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); -int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); uint __ovld atomic_load(volatile atomic_uint *object); -uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); -uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); float __ovld atomic_load(volatile atomic_float *object); -float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); -float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_load(volatile atomic_double *object); -double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); -double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); #endif //cl_khr_fp64 long __ovld atomic_load(volatile atomic_long *object); -long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); -long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); ulong __ovld atomic_load(volatile atomic_ulong *object); +#endif +#endif + +#if defined(__opencl_c_atomic_scope_device) +int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); +uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); +float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); +#endif //cl_khr_fp64 +long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); +#endif +#endif + +int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); +uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); +float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); +#endif //cl_khr_fp64 +long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); #endif // atomic_exchange() +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) int __ovld atomic_exchange(volatile atomic_int *object, int desired); -int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); -int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); -uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); -uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); float __ovld atomic_exchange(volatile atomic_float *object, float desired); -float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); -float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 double __ovld atomic_exchange(volatile atomic_double *object, double desired); -double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); -double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); #endif //cl_khr_fp64 long __ovld atomic_exchange(volatile atomic_long *object, long desired); -long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); -long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); +#endif +#endif + +#if defined(__opencl_c_atomic_scope_device) +int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); +uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); +float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); +#endif //cl_khr_fp64 +long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); +#endif +#endif + +int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); +uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); +float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); +#endif //cl_khr_fp64 +long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); #endif // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() - +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); +bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); +bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); + +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); +#endif //cl_khr_fp64 +bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); +bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); +#endif +#endif + bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, int desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, - int desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, uint desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, - uint desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, int desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, - int desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, uint desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, - uint desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, float desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, - float desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, float desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, - float desired, memory_order success, memory_order failure, memory_scope scope); #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #ifdef cl_khr_fp64 -bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, double desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, - double desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, double desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, - double desired, memory_order success, memory_order failure, memory_scope scope); #endif //cl_khr_fp64 -bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, long desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, - long desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, long desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, - long desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, ulong desired, memory_order success, memory_order failure); -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, - ulong desired, memory_order success, memory_order failure, memory_scope scope); -bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, ulong desired, memory_order success, memory_order failure); +#endif + +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, + int desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, + uint desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, + int desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, + uint desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, + float desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, + float desired, memory_order success, memory_order failure, memory_scope scope); +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#ifdef cl_khr_fp64 +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, + double desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, + double desired, memory_order success, memory_order failure, memory_scope scope); +#endif //cl_khr_fp64 +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, + long desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, + long desired, memory_order success, memory_order failure, memory_scope scope); +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, + ulong desired, memory_order success, memory_order failure, memory_scope scope); bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, ulong desired, memory_order success, memory_order failure, memory_scope scope); #endif // atomic_flag_test_and_set() and atomic_flag_clear() - +#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); -bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); -bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); void __ovld atomic_flag_clear(volatile atomic_flag *object); +#endif + +#if defined(__opencl_c_atomic_scope_device) +bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); -void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); +#endif +bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); +void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions @@ -14176,12 +14116,6 @@ half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask); half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask); #endif //cl_khr_fp16 -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) -// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf - -int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); -#endif - // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions #ifdef cl_khr_gl_msaa_sharing @@ -16130,6 +16064,230 @@ double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); #endif // cl_khr_subgroup_clustered_reduce +#if defined(cl_khr_extended_bit_ops) +char __ovld __cnfn bitfield_insert(char, char, uint, uint); +uchar __ovld __cnfn bitfield_insert(uchar, uchar, uint, uint); +short __ovld __cnfn bitfield_insert(short, short, uint, uint); +ushort __ovld __cnfn bitfield_insert(ushort, ushort, uint, uint); +int __ovld __cnfn bitfield_insert(int, int, uint, uint); +uint __ovld __cnfn bitfield_insert(uint, uint, uint, uint); +long __ovld __cnfn bitfield_insert(long, long, uint, uint); +ulong __ovld __cnfn bitfield_insert(ulong, ulong, uint, uint); +char2 __ovld __cnfn bitfield_insert(char2, char2, uint, uint); +uchar2 __ovld __cnfn bitfield_insert(uchar2, uchar2, uint, uint); +short2 __ovld __cnfn bitfield_insert(short2, short2, uint, uint); +ushort2 __ovld __cnfn bitfield_insert(ushort2, ushort2, uint, uint); +int2 __ovld __cnfn bitfield_insert(int2, int2, uint, uint); +uint2 __ovld __cnfn bitfield_insert(uint2, uint2, uint, uint); +long2 __ovld __cnfn bitfield_insert(long2, long2, uint, uint); +ulong2 __ovld __cnfn bitfield_insert(ulong2, ulong2, uint, uint); +char3 __ovld __cnfn bitfield_insert(char3, char3, uint, uint); +uchar3 __ovld __cnfn bitfield_insert(uchar3, uchar3, uint, uint); +short3 __ovld __cnfn bitfield_insert(short3, short3, uint, uint); +ushort3 __ovld __cnfn bitfield_insert(ushort3, ushort3, uint, uint); +int3 __ovld __cnfn bitfield_insert(int3, int3, uint, uint); +uint3 __ovld __cnfn bitfield_insert(uint3, uint3, uint, uint); +long3 __ovld __cnfn bitfield_insert(long3, long3, uint, uint); +ulong3 __ovld __cnfn bitfield_insert(ulong3, ulong3, uint, uint); +char4 __ovld __cnfn bitfield_insert(char4, char4, uint, uint); +uchar4 __ovld __cnfn bitfield_insert(uchar4, uchar4, uint, uint); +short4 __ovld __cnfn bitfield_insert(short4, short4, uint, uint); +ushort4 __ovld __cnfn bitfield_insert(ushort4, ushort4, uint, uint); +int4 __ovld __cnfn bitfield_insert(int4, int4, uint, uint); +uint4 __ovld __cnfn bitfield_insert(uint4, uint4, uint, uint); +long4 __ovld __cnfn bitfield_insert(long4, long4, uint, uint); +ulong4 __ovld __cnfn bitfield_insert(ulong4, ulong4, uint, uint); +char8 __ovld __cnfn bitfield_insert(char8, char8, uint, uint); +uchar8 __ovld __cnfn bitfield_insert(uchar8, uchar8, uint, uint); +short8 __ovld __cnfn bitfield_insert(short8, short8, uint, uint); +ushort8 __ovld __cnfn bitfield_insert(ushort8, ushort8, uint, uint); +int8 __ovld __cnfn bitfield_insert(int8, int8, uint, uint); +uint8 __ovld __cnfn bitfield_insert(uint8, uint8, uint, uint); +long8 __ovld __cnfn bitfield_insert(long8, long8, uint, uint); +ulong8 __ovld __cnfn bitfield_insert(ulong8, ulong8, uint, uint); +char16 __ovld __cnfn bitfield_insert(char16, char16, uint, uint); +uchar16 __ovld __cnfn bitfield_insert(uchar16, uchar16, uint, uint); +short16 __ovld __cnfn bitfield_insert(short16, short16, uint, uint); +ushort16 __ovld __cnfn bitfield_insert(ushort16, ushort16, uint, uint); +int16 __ovld __cnfn bitfield_insert(int16, int16, uint, uint); +uint16 __ovld __cnfn bitfield_insert(uint16, uint16, uint, uint); +long16 __ovld __cnfn bitfield_insert(long16, long16, uint, uint); +ulong16 __ovld __cnfn bitfield_insert(ulong16, ulong16, uint, uint); + +char __ovld __cnfn bitfield_extract_signed(char, uint, uint); +short __ovld __cnfn bitfield_extract_signed(short, uint, uint); +int __ovld __cnfn bitfield_extract_signed(int, uint, uint); +long __ovld __cnfn bitfield_extract_signed(long, uint, uint); +char2 __ovld __cnfn bitfield_extract_signed(char2, uint, uint); +short2 __ovld __cnfn bitfield_extract_signed(short2, uint, uint); +int2 __ovld __cnfn bitfield_extract_signed(int2, uint, uint); +long2 __ovld __cnfn bitfield_extract_signed(long2, uint, uint); +char3 __ovld __cnfn bitfield_extract_signed(char3, uint, uint); +short3 __ovld __cnfn bitfield_extract_signed(short3, uint, uint); +int3 __ovld __cnfn bitfield_extract_signed(int3, uint, uint); +long3 __ovld __cnfn bitfield_extract_signed(long3, uint, uint); +char4 __ovld __cnfn bitfield_extract_signed(char4, uint, uint); +short4 __ovld __cnfn bitfield_extract_signed(short4, uint, uint); +int4 __ovld __cnfn bitfield_extract_signed(int4, uint, uint); +long4 __ovld __cnfn bitfield_extract_signed(long4, uint, uint); +char8 __ovld __cnfn bitfield_extract_signed(char8, uint, uint); +short8 __ovld __cnfn bitfield_extract_signed(short8, uint, uint); +int8 __ovld __cnfn bitfield_extract_signed(int8, uint, uint); +long8 __ovld __cnfn bitfield_extract_signed(long8, uint, uint); +char16 __ovld __cnfn bitfield_extract_signed(char16, uint, uint); +short16 __ovld __cnfn bitfield_extract_signed(short16, uint, uint); +int16 __ovld __cnfn bitfield_extract_signed(int16, uint, uint); +long16 __ovld __cnfn bitfield_extract_signed(long16, uint, uint); + +char __ovld __cnfn bitfield_extract_signed(uchar, uint, uint); +short __ovld __cnfn bitfield_extract_signed(ushort, uint, uint); +int __ovld __cnfn bitfield_extract_signed(uint, uint, uint); +long __ovld __cnfn bitfield_extract_signed(ulong, uint, uint); +char2 __ovld __cnfn bitfield_extract_signed(uchar2, uint, uint); +short2 __ovld __cnfn bitfield_extract_signed(ushort2, uint, uint); +int2 __ovld __cnfn bitfield_extract_signed(uint2, uint, uint); +long2 __ovld __cnfn bitfield_extract_signed(ulong2, uint, uint); +char3 __ovld __cnfn bitfield_extract_signed(uchar3, uint, uint); +short3 __ovld __cnfn bitfield_extract_signed(ushort3, uint, uint); +int3 __ovld __cnfn bitfield_extract_signed(uint3, uint, uint); +long3 __ovld __cnfn bitfield_extract_signed(ulong3, uint, uint); +char4 __ovld __cnfn bitfield_extract_signed(uchar4, uint, uint); +short4 __ovld __cnfn bitfield_extract_signed(ushort4, uint, uint); +int4 __ovld __cnfn bitfield_extract_signed(uint4, uint, uint); +long4 __ovld __cnfn bitfield_extract_signed(ulong4, uint, uint); +char8 __ovld __cnfn bitfield_extract_signed(uchar8, uint, uint); +short8 __ovld __cnfn bitfield_extract_signed(ushort8, uint, uint); +int8 __ovld __cnfn bitfield_extract_signed(uint8, uint, uint); +long8 __ovld __cnfn bitfield_extract_signed(ulong8, uint, uint); +char16 __ovld __cnfn bitfield_extract_signed(uchar16, uint, uint); +short16 __ovld __cnfn bitfield_extract_signed(ushort16, uint, uint); +int16 __ovld __cnfn bitfield_extract_signed(uint16, uint, uint); +long16 __ovld __cnfn bitfield_extract_signed(ulong16, uint, uint); + +uchar __ovld __cnfn bitfield_extract_unsigned(char, uint, uint); +ushort __ovld __cnfn bitfield_extract_unsigned(short, uint, uint); +uint __ovld __cnfn bitfield_extract_unsigned(int, uint, uint); +ulong __ovld __cnfn bitfield_extract_unsigned(long, uint, uint); +uchar2 __ovld __cnfn bitfield_extract_unsigned(char2, uint, uint); +ushort2 __ovld __cnfn bitfield_extract_unsigned(short2, uint, uint); +uint2 __ovld __cnfn bitfield_extract_unsigned(int2, uint, uint); +ulong2 __ovld __cnfn bitfield_extract_unsigned(long2, uint, uint); +uchar3 __ovld __cnfn bitfield_extract_unsigned(char3, uint, uint); +ushort3 __ovld __cnfn bitfield_extract_unsigned(short3, uint, uint); +uint3 __ovld __cnfn bitfield_extract_unsigned(int3, uint, uint); +ulong3 __ovld __cnfn bitfield_extract_unsigned(long3, uint, uint); +uchar4 __ovld __cnfn bitfield_extract_unsigned(char4, uint, uint); +ushort4 __ovld __cnfn bitfield_extract_unsigned(short4, uint, uint); +uint4 __ovld __cnfn bitfield_extract_unsigned(int4, uint, uint); +ulong4 __ovld __cnfn bitfield_extract_unsigned(long4, uint, uint); +uchar8 __ovld __cnfn bitfield_extract_unsigned(char8, uint, uint); +ushort8 __ovld __cnfn bitfield_extract_unsigned(short8, uint, uint); +uint8 __ovld __cnfn bitfield_extract_unsigned(int8, uint, uint); +ulong8 __ovld __cnfn bitfield_extract_unsigned(long8, uint, uint); +uchar16 __ovld __cnfn bitfield_extract_unsigned(char16, uint, uint); +ushort16 __ovld __cnfn bitfield_extract_unsigned(short16, uint, uint); +uint16 __ovld __cnfn bitfield_extract_unsigned(int16, uint, uint); +ulong16 __ovld __cnfn bitfield_extract_unsigned(long16, uint, uint); + +uchar __ovld __cnfn bitfield_extract_unsigned(uchar, uint, uint); +ushort __ovld __cnfn bitfield_extract_unsigned(ushort, uint, uint); +uint __ovld __cnfn bitfield_extract_unsigned(uint, uint, uint); +ulong __ovld __cnfn bitfield_extract_unsigned(ulong, uint, uint); +uchar2 __ovld __cnfn bitfield_extract_unsigned(uchar2, uint, uint); +ushort2 __ovld __cnfn bitfield_extract_unsigned(ushort2, uint, uint); +uint2 __ovld __cnfn bitfield_extract_unsigned(uint2, uint, uint); +ulong2 __ovld __cnfn bitfield_extract_unsigned(ulong2, uint, uint); +uchar3 __ovld __cnfn bitfield_extract_unsigned(uchar3, uint, uint); +ushort3 __ovld __cnfn bitfield_extract_unsigned(ushort3, uint, uint); +uint3 __ovld __cnfn bitfield_extract_unsigned(uint3, uint, uint); +ulong3 __ovld __cnfn bitfield_extract_unsigned(ulong3, uint, uint); +uchar4 __ovld __cnfn bitfield_extract_unsigned(uchar4, uint, uint); +ushort4 __ovld __cnfn bitfield_extract_unsigned(ushort4, uint, uint); +uint4 __ovld __cnfn bitfield_extract_unsigned(uint4, uint, uint); +ulong4 __ovld __cnfn bitfield_extract_unsigned(ulong4, uint, uint); +uchar8 __ovld __cnfn bitfield_extract_unsigned(uchar8, uint, uint); +ushort8 __ovld __cnfn bitfield_extract_unsigned(ushort8, uint, uint); +uint8 __ovld __cnfn bitfield_extract_unsigned(uint8, uint, uint); +ulong8 __ovld __cnfn bitfield_extract_unsigned(ulong8, uint, uint); +uchar16 __ovld __cnfn bitfield_extract_unsigned(uchar16, uint, uint); +ushort16 __ovld __cnfn bitfield_extract_unsigned(ushort16, uint, uint); +uint16 __ovld __cnfn bitfield_extract_unsigned(uint16, uint, uint); +ulong16 __ovld __cnfn bitfield_extract_unsigned(ulong16, uint, uint); + +char __ovld __cnfn bit_reverse(char); +uchar __ovld __cnfn bit_reverse(uchar); +short __ovld __cnfn bit_reverse(short); +ushort __ovld __cnfn bit_reverse(ushort); +int __ovld __cnfn bit_reverse(int); +uint __ovld __cnfn bit_reverse(uint); +long __ovld __cnfn bit_reverse(long); +ulong __ovld __cnfn bit_reverse(ulong); +char2 __ovld __cnfn bit_reverse(char2); +uchar2 __ovld __cnfn bit_reverse(uchar2); +short2 __ovld __cnfn bit_reverse(short2); +ushort2 __ovld __cnfn bit_reverse(ushort2); +int2 __ovld __cnfn bit_reverse(int2); +uint2 __ovld __cnfn bit_reverse(uint2); +long2 __ovld __cnfn bit_reverse(long2); +ulong2 __ovld __cnfn bit_reverse(ulong2); +char3 __ovld __cnfn bit_reverse(char3); +uchar3 __ovld __cnfn bit_reverse(uchar3); +short3 __ovld __cnfn bit_reverse(short3); +ushort3 __ovld __cnfn bit_reverse(ushort3); +int3 __ovld __cnfn bit_reverse(int3); +uint3 __ovld __cnfn bit_reverse(uint3); +long3 __ovld __cnfn bit_reverse(long3); +ulong3 __ovld __cnfn bit_reverse(ulong3); +char4 __ovld __cnfn bit_reverse(char4); +uchar4 __ovld __cnfn bit_reverse(uchar4); +short4 __ovld __cnfn bit_reverse(short4); +ushort4 __ovld __cnfn bit_reverse(ushort4); +int4 __ovld __cnfn bit_reverse(int4); +uint4 __ovld __cnfn bit_reverse(uint4); +long4 __ovld __cnfn bit_reverse(long4); +ulong4 __ovld __cnfn bit_reverse(ulong4); +char8 __ovld __cnfn bit_reverse(char8); +uchar8 __ovld __cnfn bit_reverse(uchar8); +short8 __ovld __cnfn bit_reverse(short8); +ushort8 __ovld __cnfn bit_reverse(ushort8); +int8 __ovld __cnfn bit_reverse(int8); +uint8 __ovld __cnfn bit_reverse(uint8); +long8 __ovld __cnfn bit_reverse(long8); +ulong8 __ovld __cnfn bit_reverse(ulong8); +char16 __ovld __cnfn bit_reverse(char16); +uchar16 __ovld __cnfn bit_reverse(uchar16); +short16 __ovld __cnfn bit_reverse(short16); +ushort16 __ovld __cnfn bit_reverse(ushort16); +int16 __ovld __cnfn bit_reverse(int16); +uint16 __ovld __cnfn bit_reverse(uint16); +long16 __ovld __cnfn bit_reverse(long16); +ulong16 __ovld __cnfn bit_reverse(ulong16); +#endif // cl_khr_extended_bit_ops + +#if defined(__opencl_c_integer_dot_product_input_4x8bit) +uint __ovld __cnfn dot(uchar4, uchar4); +int __ovld __cnfn dot(char4, char4); +int __ovld __cnfn dot(uchar4, char4); +int __ovld __cnfn dot(char4, uchar4); + +uint __ovld __cnfn dot_acc_sat(uchar4, uchar4, uint); +int __ovld __cnfn dot_acc_sat(char4, char4, int); +int __ovld __cnfn dot_acc_sat(uchar4, char4, int); +int __ovld __cnfn dot_acc_sat(char4, uchar4, int); +#endif // __opencl_c_integer_dot_product_input_4x8bit + +#if defined(__opencl_c_integer_dot_product_input_4x8bit_packed) +uint __ovld __cnfn dot_4x8packed_uu_uint(uint, uint); +int __ovld __cnfn dot_4x8packed_ss_int(uint, uint); +int __ovld __cnfn dot_4x8packed_us_int(uint, uint); +int __ovld __cnfn dot_4x8packed_su_int(uint, uint); + +uint __ovld __cnfn dot_acc_sat_4x8packed_uu_uint(uint, uint, uint); +int __ovld __cnfn dot_acc_sat_4x8packed_ss_int(uint, uint, int); +int __ovld __cnfn dot_acc_sat_4x8packed_us_int(uint, uint, int); +int __ovld __cnfn dot_acc_sat_4x8packed_su_int(uint, uint, int); +#endif // __opencl_c_integer_dot_product_input_4x8bit_packed + #if defined(cl_intel_subgroups) // Intel-Specific Sub Group Functions float __ovld __conv intel_sub_group_shuffle( float x, uint c ); @@ -17127,31 +17285,23 @@ uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2); #endif // cl_amd_media_ops2 #if defined(cl_arm_integer_dot_product_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : begin uint __ovld arm_dot(uchar4 a, uchar4 b); int __ovld arm_dot(char4 a, char4 b); -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : end #endif // defined(cl_arm_integer_dot_product_int8) #if defined(cl_arm_integer_dot_product_accumulate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : begin uint __ovld arm_dot_acc(uchar4 a, uchar4 b, uint c); int __ovld arm_dot_acc(char4 a, char4 b, int c); -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : end #endif // defined(cl_arm_integer_dot_product_accumulate_int8) #if defined(cl_arm_integer_dot_product_accumulate_int16) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : begin uint __ovld arm_dot_acc(ushort2 a, ushort2 b, uint c); int __ovld arm_dot_acc(short2 a, short2 b, int c); -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : end #endif // defined(cl_arm_integer_dot_product_accumulate_int16) #if defined(cl_arm_integer_dot_product_accumulate_saturate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : begin uint __ovld arm_dot_acc_sat(uchar4 a, uchar4 b, uint c); int __ovld arm_dot_acc_sat(char4 a, char4 b, int c); -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : end #endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8) // Disable any extensions we may have enabled previously. |
