[CUDA] Improve handling of math functions.
Summary: A bunch of related changes here to our CUDA math headers. - The second arg to nexttoward is a double (well, technically, long double, but we don't have that), not a float. - Add a forward-declare of llround(float), which is defined in the CUDA headers. We need this for the same reason we need most of the other forward-declares: To prevent a constexpr function in our standard library from becoming host+device. - Add nexttowardf implementation. - Pull "foobarf" functions defined by the CUDA headers in the global namespace into namespace std. This lets you do e.g. std::sinf. - Add overloads for math functions accepting integer types. This lets you do e.g. std::sin(0) without having an ambiguity between the overload that takes a float and the one that takes a double. With these changes, we pass testcases derived from libc++ for cmath and math.h. We can check these testcases in to the test-suite once support for CUDA lands there. Reviewers: tra Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D23627 llvm-svn: 279140
This commit is contained in:
parent
40e8ca46ad
commit
cb20a09f54
|
@ -26,13 +26,15 @@
|
|||
#error "This file is for CUDA compilation only."
|
||||
#endif
|
||||
|
||||
#include <limits>
|
||||
|
||||
// CUDA lets us use various std math functions on the device side. This file
|
||||
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
|
||||
//
|
||||
// Specifically, the forward-declares header declares __device__ overloads for
|
||||
// these functions in the global namespace, then pulls them into namespace std
|
||||
// with 'using' statements. Then this file implements those functions, after
|
||||
// the implementations have been pulled in.
|
||||
// their implementations have been pulled in.
|
||||
//
|
||||
// It's important that we declare the functions in the global namespace and pull
|
||||
// them into namespace std with using statements, as opposed to simply declaring
|
||||
|
@ -120,12 +122,15 @@ __DEVICE__ float ldexp(float __arg, int __exp) {
|
|||
__DEVICE__ float log(float __x) { return ::logf(__x); }
|
||||
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
|
||||
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
||||
__DEVICE__ float nexttoward(float __from, float __to) {
|
||||
__DEVICE__ float nexttoward(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ double nexttoward(double __from, double __to) {
|
||||
return __builtin_nexttoward(__from, __to);
|
||||
}
|
||||
__DEVICE__ float nexttowardf(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ float pow(float __base, float __exp) {
|
||||
return ::powf(__base, __exp);
|
||||
}
|
||||
|
@ -143,6 +148,280 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
|
|||
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
||||
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
||||
|
||||
// Now we've defined everything we promised we'd define in
|
||||
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
|
||||
// fix up our math functions.
|
||||
//
|
||||
// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
|
||||
// only sin(float) and sin(double), which means that e.g. sin(0) is
|
||||
// ambiguous.
|
||||
//
|
||||
// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
|
||||
// std. These are defined in the CUDA headers in the global namespace,
|
||||
// independent of everything else we've done here.
|
||||
|
||||
// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
|
||||
// we go ahead and unconditionally define functions that are only available when
|
||||
// compiling for C++11 to match the behavior of the CUDA headers.
|
||||
template<bool __B, class __T = void>
|
||||
struct __clang_cuda_enable_if {};
|
||||
|
||||
template <class __T> struct __clang_cuda_enable_if<true, __T> {
|
||||
typedef __T type;
|
||||
};
|
||||
|
||||
// Defines an overload of __fn that accepts one integral argument, calls
|
||||
// __fn((double)x), and returns __retty.
|
||||
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
|
||||
template <typename __T> \
|
||||
__DEVICE__ \
|
||||
typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
|
||||
__retty>::type \
|
||||
__fn(__T __x) { \
|
||||
return ::__fn((double)__x); \
|
||||
}
|
||||
|
||||
// Defines an overload of __fn that accepts one two arithmetic arguments, calls
|
||||
// __fn((double)x, (double)y), and returns a double.
|
||||
//
|
||||
// Note this is different from OVERLOAD_1, which generates an overload that
|
||||
// accepts only *integral* arguments.
|
||||
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
|
||||
template <typename __T1, typename __T2> \
|
||||
__DEVICE__ typename __clang_cuda_enable_if< \
|
||||
std::numeric_limits<__T1>::is_specialized && \
|
||||
std::numeric_limits<__T2>::is_specialized, \
|
||||
__retty>::type \
|
||||
__fn(__T1 __x, __T2 __y) { \
|
||||
return __fn((double)__x, (double)__y); \
|
||||
}
|
||||
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
|
||||
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
|
||||
|
||||
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
|
||||
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
|
||||
|
||||
// Overloads for functions that don't match the patterns expected by
|
||||
// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
|
||||
template <typename __T1, typename __T2, typename __T3>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<
|
||||
std::numeric_limits<__T1>::is_specialized &&
|
||||
std::numeric_limits<__T2>::is_specialized &&
|
||||
std::numeric_limits<__T3>::is_specialized,
|
||||
double>::type
|
||||
fma(__T1 __x, __T2 __y, __T3 __z) {
|
||||
return std::fma((double)__x, (double)__y, (double)__z);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
frexp(__T __x, int *__exp) {
|
||||
return std::frexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
ldexp(__T __x, int __exp) {
|
||||
return std::ldexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
nexttoward(__T __from, double __to) {
|
||||
return std::nexttoward((double)__from, __to);
|
||||
}
|
||||
|
||||
template <typename __T1, typename __T2>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<
|
||||
std::numeric_limits<__T1>::is_specialized &&
|
||||
std::numeric_limits<__T2>::is_specialized,
|
||||
double>::type
|
||||
remquo(__T1 __x, __T2 __y, int *__quo) {
|
||||
return std::remquo((double)__x, (double)__y, __quo);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
scalbln(__T __x, long __exp) {
|
||||
return std::scalbln((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
scalbn(__T __x, int __exp) {
|
||||
return std::scalbn((double)__x, __exp);
|
||||
}
|
||||
|
||||
namespace std {
|
||||
// Pull the new overloads we defined above into namespace std.
|
||||
using ::acos;
|
||||
using ::acosh;
|
||||
using ::asin;
|
||||
using ::asinh;
|
||||
using ::atan;
|
||||
using ::atan2;
|
||||
using ::atanh;
|
||||
using ::cbrt;
|
||||
using ::ceil;
|
||||
using ::cos;
|
||||
using ::cosh;
|
||||
using ::erf;
|
||||
using ::erfc;
|
||||
using ::exp;
|
||||
using ::exp2;
|
||||
using ::expm1;
|
||||
using ::fabs;
|
||||
using ::floor;
|
||||
using ::frexp;
|
||||
using ::ilogb;
|
||||
using ::ldexp;
|
||||
using ::lgamma;
|
||||
using ::llrint;
|
||||
using ::llround;
|
||||
using ::log;
|
||||
using ::log10;
|
||||
using ::log1p;
|
||||
using ::log2;
|
||||
using ::logb;
|
||||
using ::lrint;
|
||||
using ::lround;
|
||||
using ::nexttoward;
|
||||
using ::pow;
|
||||
using ::remquo;
|
||||
using ::scalbln;
|
||||
using ::scalbn;
|
||||
using ::sin;
|
||||
using ::sinh;
|
||||
using ::sqrt;
|
||||
using ::tan;
|
||||
using ::tanh;
|
||||
using ::tgamma;
|
||||
|
||||
// Finally, pull the "foobarf" functions that CUDA defines in its headers into
|
||||
// namespace std.
|
||||
using ::acosf;
|
||||
using ::acoshf;
|
||||
using ::asinf;
|
||||
using ::asinhf;
|
||||
using ::atan2f;
|
||||
using ::atanf;
|
||||
using ::atanhf;
|
||||
using ::cbrtf;
|
||||
using ::ceilf;
|
||||
using ::copysignf;
|
||||
using ::cosf;
|
||||
using ::coshf;
|
||||
using ::erfcf;
|
||||
using ::erff;
|
||||
using ::exp2f;
|
||||
using ::expf;
|
||||
using ::expm1f;
|
||||
using ::fabsf;
|
||||
using ::fdimf;
|
||||
using ::floorf;
|
||||
using ::fmaf;
|
||||
using ::fmaxf;
|
||||
using ::fminf;
|
||||
using ::fmodf;
|
||||
using ::frexpf;
|
||||
using ::hypotf;
|
||||
using ::ilogbf;
|
||||
using ::ldexpf;
|
||||
using ::lgammaf;
|
||||
using ::llrintf;
|
||||
using ::llroundf;
|
||||
using ::log10f;
|
||||
using ::log1pf;
|
||||
using ::log2f;
|
||||
using ::logbf;
|
||||
using ::logf;
|
||||
using ::lrintf;
|
||||
using ::lroundf;
|
||||
using ::modff;
|
||||
using ::nearbyintf;
|
||||
using ::nextafterf;
|
||||
using ::nexttowardf;
|
||||
using ::nexttowardf;
|
||||
using ::powf;
|
||||
using ::remainderf;
|
||||
using ::remquof;
|
||||
using ::rintf;
|
||||
using ::roundf;
|
||||
using ::scalblnf;
|
||||
using ::scalbnf;
|
||||
using ::sinf;
|
||||
using ::sinhf;
|
||||
using ::sqrtf;
|
||||
using ::tanf;
|
||||
using ::tanhf;
|
||||
using ::tgammaf;
|
||||
using ::truncf;
|
||||
}
|
||||
|
||||
#undef __DEVICE__
|
||||
|
||||
#endif
|
||||
|
|
|
@ -140,6 +140,7 @@ __DEVICE__ long lrint(double);
|
|||
__DEVICE__ long lrint(float);
|
||||
__DEVICE__ long lround(double);
|
||||
__DEVICE__ long lround(float);
|
||||
__DEVICE__ long long llround(float); // No llround(double).
|
||||
__DEVICE__ double modf(double, double *);
|
||||
__DEVICE__ float modf(float, float *);
|
||||
__DEVICE__ double nan(const char *);
|
||||
|
@ -149,7 +150,8 @@ __DEVICE__ float nearbyint(float);
|
|||
__DEVICE__ double nextafter(double, double);
|
||||
__DEVICE__ float nextafter(float, float);
|
||||
__DEVICE__ double nexttoward(double, double);
|
||||
__DEVICE__ float nexttoward(float, float);
|
||||
__DEVICE__ float nexttoward(float, double);
|
||||
__DEVICE__ float nexttowardf(float, double);
|
||||
__DEVICE__ double pow(double, double);
|
||||
__DEVICE__ double pow(double, int);
|
||||
__DEVICE__ float pow(float, float);
|
||||
|
@ -235,6 +237,7 @@ using ::log2;
|
|||
using ::logb;
|
||||
using ::lrint;
|
||||
using ::lround;
|
||||
using ::llround;
|
||||
using ::modf;
|
||||
using ::nan;
|
||||
using ::nanf;
|
||||
|
|
Loading…
Reference in New Issue