I\'ve tried playing with clang\'s extended vectors. The ternary operator is supposed to work, but it is not working for me. Example:
int main()
{
using int4 =
You can loop over the elements directly in Clang. Here is a solution for GCC and Clang.
#include
#include
#if defined(__clang__)
typedef float float4 __attribute__ ((ext_vector_type(4)));
typedef int int4 __attribute__ ((ext_vector_type(4)));
#else
typedef float float4 __attribute__ ((vector_size (sizeof(float)*4)));
typedef int int4 __attribute__ ((vector_size (sizeof(int)*4)));
#endif
float4 select(int4 s, float4 a, float4 b) {
float4 c;
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
c = s ? a : b;
#else
for(int i=0; i<4; i++) c[i] = s[i] ? a[i] : b[i];
#endif
return c;
}
The both generate
select(int __vector(4), float __vector(4), float __vector(4)):
pxor xmm3, xmm3
pcmpeqd xmm0, xmm3
blendvps xmm1, xmm2, xmm0
movaps xmm0, xmm1
ret
But with AVX512 it's better to use masks (e.g. __mmask16
).