DPC++ Runtime
Runtime libraries for oneAPI DPC++
math_functions.cpp
Go to the documentation of this file.
1 //==------------------- math_functions.cpp ---------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Define _USE_MATH_DEFINES to enforce math defines of macros like M_PI in
10 // <cmath>. _USE_MATH_DEFINES is defined here before includes of SYCL header
11 // files to avoid include of <cmath> via those SYCL headers with unset
12 // _USE_MATH_DEFINES.
13 #define _USE_MATH_DEFINES
14 
15 #include <cmath>
16 
18 
19 #include "host_helper_macros.hpp"
20 
21 namespace sycl {
22 inline namespace _V1 {
23 #define BUILTIN_GENF_CUSTOM(NUM_ARGS, NAME, IMPL) \
24  HOST_IMPL(NAME, IMPL) \
25  EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME, FP_TYPES)
26 
27 // NOTE: "-> decltype(x)" here and below is need for the half version, what
28 // implementation do is invoking implicit conversion to float and compute the
29 // result in float precision. Need to convert back by specifying return type.
30 #define BUILTIN_GENF(NUM_ARGS, NAME) \
31  BUILTIN_GENF_CUSTOM(NUM_ARGS, NAME, [](NUM_ARGS##_AUTO_ARG) -> decltype(x) { \
32  return std::NAME(NUM_ARGS##_ARG); \
33  })
34 
35 BUILTIN_GENF(ONE_ARG, acos)
36 BUILTIN_GENF(ONE_ARG, acosh)
37 BUILTIN_GENF_CUSTOM(ONE_ARG, acospi,
38  [](auto x) -> decltype(x) { return std::acos(x) / M_PI; })
39 BUILTIN_GENF(ONE_ARG, asin)
40 BUILTIN_GENF(ONE_ARG, asinh)
42  [](auto x) -> decltype(x) { return std::asin(x) / M_PI; })
43 BUILTIN_GENF(ONE_ARG, atan)
44 BUILTIN_GENF(ONE_ARG, atanh)
45 BUILTIN_GENF_CUSTOM(ONE_ARG, atanpi,
46  [](auto x) -> decltype(x) { return std::atan(x) / M_PI; })
47 BUILTIN_GENF(TWO_ARGS, atan2)
48 BUILTIN_GENF_CUSTOM(TWO_ARGS, atan2pi, [](auto x, auto y) -> decltype(x) {
49  return std::atan2(x, y) / M_PI;
50 })
51 BUILTIN_GENF(ONE_ARG, cbrt)
52 BUILTIN_GENF(ONE_ARG, ceil)
53 BUILTIN_GENF(TWO_ARGS, copysign)
54 BUILTIN_GENF(ONE_ARG, cos)
55 BUILTIN_GENF(ONE_ARG, cosh)
56 BUILTIN_GENF_CUSTOM(ONE_ARG, cospi, [](auto x) -> decltype(x) {
57  return std::sin(M_PI * (0.5 - x));
58 })
59 BUILTIN_GENF(ONE_ARG, erf)
60 BUILTIN_GENF(ONE_ARG, erfc)
61 BUILTIN_GENF(ONE_ARG, exp)
62 BUILTIN_GENF(ONE_ARG, exp2)
64  [](auto x) -> decltype(x) { return std::pow(10, x); })
65 BUILTIN_GENF(ONE_ARG, expm1)
66 BUILTIN_GENF(ONE_ARG, fabs)
67 BUILTIN_GENF(TWO_ARGS, fdim)
68 BUILTIN_GENF(ONE_ARG, floor)
69 BUILTIN_GENF(THREE_ARGS, fma)
70 BUILTIN_GENF(TWO_ARGS, fmax)
71 BUILTIN_GENF(TWO_ARGS, fmin)
72 BUILTIN_GENF(TWO_ARGS, fmod)
73 BUILTIN_GENF(TWO_ARGS, hypot)
74 BUILTIN_GENF(ONE_ARG, lgamma)
75 BUILTIN_GENF(ONE_ARG, log)
76 BUILTIN_GENF(ONE_ARG, log2)
77 BUILTIN_GENF(ONE_ARG, log10)
78 BUILTIN_GENF(ONE_ARG, log1p)
79 BUILTIN_GENF(ONE_ARG, logb)
80 BUILTIN_GENF_CUSTOM(THREE_ARGS, mad, [](auto x, auto y, auto z) -> decltype(x) {
81  return (x * y) + z;
82 })
83 BUILTIN_GENF_CUSTOM(TWO_ARGS, maxmag, [](auto x, auto y) -> decltype(x) {
84  if (std::fabs(x) > std::fabs(y))
85  return x;
87  return y;
88  return std::fmax(x, y);
89 })
90 BUILTIN_GENF_CUSTOM(TWO_ARGS, minmag, [](auto x, auto y) -> decltype(x) {
91  if (std::fabs(x) < std::fabs(y))
92  return x;
93  if (std::fabs(y) < std::fabs(x))
94  return y;
95  return std::fmin(x, y);
96 })
97 BUILTIN_GENF(TWO_ARGS, pow)
98 BUILTIN_GENF_CUSTOM(TWO_ARGS, powr, [](auto x, auto y) -> decltype(x) {
99  using T = decltype(x);
100  return (x >= T(0)) ? T(std::pow(x, y)) : x;
101 })
102 BUILTIN_GENF(TWO_ARGS, remainder)
103 BUILTIN_GENF(ONE_ARG, rint)
104 BUILTIN_GENF(ONE_ARG, round)
105 BUILTIN_GENF_CUSTOM(ONE_ARG, rsqrt, [](auto x) -> decltype(x) {
106  return decltype(x){1.0} / std::sqrt(x);
107 })
108 BUILTIN_GENF(ONE_ARG, sin)
109 BUILTIN_GENF(ONE_ARG, sinh)
111  [](auto x) -> decltype(x) { return std::sin(M_PI * x); })
112 BUILTIN_GENF(ONE_ARG, sqrt)
113 BUILTIN_GENF(ONE_ARG, tan)
114 BUILTIN_GENF(ONE_ARG, tanh)
116  ONE_ARG, tanpi,
117  [](auto x) -> decltype(x) { // For uniformity, place in range [0.0, 1.0).
118  double y = x - std::floor(x);
119  // Flip for better accuracy.
120  return 1.0 / std::tan((0.5 - y) * M_PI);
121  })
122 BUILTIN_GENF(ONE_ARG, tgamma)
123 BUILTIN_GENF(ONE_ARG, trunc)
124 BUILTIN_GENF_CUSTOM(TWO_ARGS, nextafter, [](auto x, auto y) {
125  if constexpr (!std::is_same_v<decltype(x), half>) {
126  return std::nextafter(x, y);
127  } else {
128  // Copied from sycl_host_nextafter, not sure if it's valid when operating on
129  // sycl::half. That said, should be covered by
130  // sycl/test/regression/host_half_nextafter.cpp
131 
132  if (std::isnan(static_cast<float>(x)))
133  return x;
134  if (std::isnan(static_cast<float>(y)) || x == y)
135  return y;
136 
137  uint16_t x_bits = sycl::bit_cast<uint16_t>(x);
138  uint16_t x_sign = x_bits & 0x8000;
139  int16_t movement = (x > y ? -1 : 1) * (x_sign ? -1 : 1);
140  if (x_bits == x_sign && movement == -1) {
141  // Special case where we underflow in the decrement, in which case we turn
142  // it around and flip the sign. The overflow case does not need special
143  // handling.
144  movement = 1;
145  x_bits ^= 0x8000;
146  }
147  x_bits += movement;
148  return sycl::bit_cast<half>(x_bits);
149  }
150 })
151 
152 namespace detail {
153 __SYCL_EXPORT float frexp_impl(float x, int *p) { return std::frexp(x, p); }
154 __SYCL_EXPORT double frexp_impl(double x, int *p) { return std::frexp(x, p); }
155 __SYCL_EXPORT half frexp_impl(half x, int *p) { return std::frexp(x, p); }
156 } // namespace detail
157 
158 namespace detail {
159 template <typename T> static inline T __lgamma_r_impl(T x, int *signp) {
160  T g = std::tgamma(x);
161  *signp = std::signbit(sycl::detail::cast_if_host_half(g)) ? -1 : 1;
162  return std::log(std::abs(g));
163 }
164 
165 __SYCL_EXPORT float lgamma_r_impl(float x, int *p) {
166  return __lgamma_r_impl(x, p);
167 }
168 __SYCL_EXPORT double lgamma_r_impl(double x, int *p) {
169  return __lgamma_r_impl(x, p);
170 }
171 __SYCL_EXPORT half lgamma_r_impl(half x, int *p) {
172  return __lgamma_r_impl(x, p);
173 }
174 } // namespace detail
175 
176 HOST_IMPL(ilogb, std::ilogb)
177 EXPORT_SCALAR_AND_VEC_1_16(ONE_ARG, ilogb, FP_TYPES)
178 
179 namespace detail {
180 __SYCL_EXPORT float modf_impl(float x, float *p) { return std::modf(x, p); }
181 __SYCL_EXPORT double modf_impl(double x, double *p) { return std::modf(x, p); }
182 __SYCL_EXPORT half modf_impl(half x, half *p) {
183  float val;
184  auto ret = std::modf(x, &val);
185  *p = val;
186  return ret;
187 }
188 } // namespace detail
189 
190 namespace detail {
191 template <typename T> static inline T __sincos(T x, T *cosval) {
192  (*cosval) = std::cos(x);
193  return std::sin(x);
194 }
195 
196 __SYCL_EXPORT float sincos_impl(float x, float *p) { return __sincos(x, p); }
197 __SYCL_EXPORT double sincos_impl(double x, double *p) { return __sincos(x, p); }
198 __SYCL_EXPORT half sincos_impl(half x, half *p) { return __sincos(x, p); }
199 } // namespace detail
200 
201 #define EXPORT_VEC_LAST_INT(NAME, TYPE, VL) \
202  vec<TYPE, VL> __SYCL_EXPORT __##NAME##_impl(vec<TYPE, VL> x, \
203  vec<int, VL> y) { \
204  return NAME##_host_impl(x, y); \
205  }
206 #define EXPORT_VEC_LAST_INT_1_16(NAME, TYPE) \
207  FOR_VEC_1_16(EXPORT_VEC_LAST_INT, NAME, TYPE)
208 
209 #define BUILTIN_MATH_LAST_INT(NAME, IMPL) \
210  __SYCL_EXPORT float __##NAME##_impl(float x, int y) { return IMPL(x, y); } \
211  __SYCL_EXPORT double __##NAME##_impl(double x, int y) { return IMPL(x, y); } \
212  __SYCL_EXPORT half __##NAME##_impl(half x, int y) { return IMPL(x, y); } \
213  HOST_IMPL(NAME, NAME /* delegate to scalar */) \
214  FOR_EACH1(EXPORT_VEC_LAST_INT_1_16, NAME, FP_TYPES)
215 
217 BUILTIN_MATH_LAST_INT(rootn, [](auto x, auto y) -> decltype(x) {
218  return std::pow(x, decltype(x){1} / y);
219 })
220 BUILTIN_MATH_LAST_INT(ldexp, std::ldexp)
221 
222 namespace {
223 template <typename T> auto __remquo_impl(T x, T y, int *z) {
224  T rem = std::remainder(x, y);
225  *z = static_cast<int>(std::round((x - rem) / y));
226  return rem;
227 }
228 } // namespace
229 namespace detail {
230 __SYCL_EXPORT float remquo_impl(float x, float y, int *z) {
231  return __remquo_impl(x, y, z);
232 }
233 __SYCL_EXPORT double remquo_impl(double x, double y, int *z) {
234  return __remquo_impl(x, y, z);
235 }
236 __SYCL_EXPORT half remquo_impl(half x, half y, int *z) {
237  return __remquo_impl(x, y, z);
238 }
239 } // namespace detail
240 } // namespace _V1
241 } // namespace sycl
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
Definition: math.hpp:388
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
Definition: math.hpp:396
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
Definition: math.hpp:441
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
Definition: math.hpp:380
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
Definition: math.hpp:384
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > acos(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1298
ESIMD_DETAIL __ESIMD_API sycl::ext::intel::esimd::simd< T, SZ > atan(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1254
sycl::ext::intel::esimd::simd< float, N > fmod(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1479
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > asin(sycl::ext::intel::esimd::simd< T, SZ > src0)
Definition: math.hpp:1336
sycl::ext::intel::esimd::simd< float, N > atan2(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
Definition: math.hpp:1446
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
Definition: math.hpp:126
#define FP_TYPES
#define EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME,...)
#define HOST_IMPL(NAME,...)
#define BUILTIN_GENF(NUM_ARGS, NAME)
#define BUILTIN_MATH_LAST_INT(NAME, IMPL)
T cast_if_host_half(T val)
Definition: half_type.hpp:561
std::enable_if_t< std::is_same_v< Tp, float >, float > copysign(Tp x, Tp y)
Definition: math.hpp:74
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > atanh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > tan(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > cosh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > asinh(const complex< _Tp > &__x)
std::enable_if_t< std::is_same_v< T, bfloat16 >, bool > isnan(T x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > acosh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > log10(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > sinh(const complex< _Tp > &__x)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fabs(T x)
float ceil(float)
return(x >=T(0)) ? T(std sinpi
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > log(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
BUILTIN_GENF_CUSTOM(ONE_ARG, acospi, [](auto x) -> decltype(x) { return std::acos(x)/M_PI;}) BUILTIN_GENF_CUSTOM(ONE_ARG
auto auto autodecltype(x) z
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > sin(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > cos(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
float floor(float)
float rint(float)
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > exp(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
autodecltype(x) x
float trunc(float)
Definition: access.hpp:18