relational: Implement signbit

v2 Changes:
   - use __builtin_signbit instead of shifting by hand
   - significantly improve vector shuffling
   - Works correctly now for signbit(float16) on radeonsi

Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
llvm-svn: 211696
This commit is contained in:
Aaron Watry 2014-06-25 13:29:23 +00:00
parent bfa644b91d
commit d9ee196eab
4 changed files with 107 additions and 0 deletions

View File

@ -112,6 +112,7 @@
#include <clc/relational/isequal.h>
#include <clc/relational/isnan.h>
#include <clc/relational/select.h>
#include <clc/relational/signbit.h>
/* 6.11.8 Synchronization Functions */
#include <clc/synchronization/cl_mem_fence_flags.h>

View File

@ -0,0 +1,18 @@
#define _CLC_SIGNBIT_DECL(TYPE, RETTYPE) \
_CLC_OVERLOAD _CLC_DECL RETTYPE signbit(TYPE x);
#define _CLC_VECTOR_SIGNBIT_DECL(TYPE, RETTYPE) \
_CLC_SIGNBIT_DECL(TYPE##2, RETTYPE##2) \
_CLC_SIGNBIT_DECL(TYPE##3, RETTYPE##3) \
_CLC_SIGNBIT_DECL(TYPE##4, RETTYPE##4) \
_CLC_SIGNBIT_DECL(TYPE##8, RETTYPE##8) \
_CLC_SIGNBIT_DECL(TYPE##16, RETTYPE##16)
_CLC_SIGNBIT_DECL(float, int)
_CLC_VECTOR_SIGNBIT_DECL(float, int)
#ifdef cl_khr_fp64
_CLC_SIGNBIT_DECL(double, int)
_CLC_VECTOR_SIGNBIT_DECL(double, long)
#endif

View File

@ -42,6 +42,7 @@ relational/all.cl
relational/any.cl
relational/isequal.cl
relational/isnan.cl
relational/signbit.cl
shared/clamp.cl
shared/max.cl
shared/min.cl

View File

@ -0,0 +1,87 @@
#include <clc/clc.h>
#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \
return BUILTIN_NAME(x); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)((FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)) != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)( \
( \
FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \
) != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)( \
( \
FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \
) != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
return (RET_TYPE)( \
( \
FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \
FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \
) != (RET_TYPE)0); \
} \
#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16) \
_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of signbit(double) returns an int, but the vector versions
// return long.
_CLC_DEF _CLC_OVERLOAD int signbit(double x){
return __builtin_signbit(x);
}
_CLC_DEFINE_RELATIONAL_UNARY_VEC2(long2, signbit, double2)
_CLC_DEFINE_RELATIONAL_UNARY_VEC3(long3, signbit, double3)
_CLC_DEFINE_RELATIONAL_UNARY_VEC4(long4, signbit, double4)
_CLC_DEFINE_RELATIONAL_UNARY_VEC8(long8, signbit, double8)
_CLC_DEFINE_RELATIONAL_UNARY_VEC16(long16, signbit, double16)
#endif
#undef _CLC_DEFINE_RELATIONAL_UNARY
#undef _CLC_DEFINE_RELATIONAL_UNARY_SCALAR
#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC2
#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC3
#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC4
#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC8
#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC16