DPC++ Runtime
Runtime libraries for oneAPI DPC++
math_functions.inc
Go to the documentation of this file.
1 //==------------------- math_functions.hpp ---------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Intentionally insufficient set of includes and no "#pragma once".
10 
12 
13 namespace sycl {
14 inline namespace _V1 {
15 namespace detail {
16 template <typename... Ts> struct last_int_rest_same {
17  static constexpr bool value = []() constexpr {
18  constexpr auto N = sizeof...(Ts);
19  using first_type = typename first_type<Ts...>::type;
20  if (!builtin_same_shape_v<first_type>)
21  return false;
22  int i = 0;
23  using int_type =
24  std::conditional_t<is_vec_or_swizzle_v<first_type>, int32_t, int>;
25  return (
26  (... &&
27  (++i == N
28  ? /* last */ builtin_same_shape_v<Ts> // filter out "bad" types,
29  // e.g. multi-ptr
30  && std::is_same_v<get_elem_type_t<Ts>, int_type>
31  : /* not last */ builtin_same_or_swizzle_v<first_type, Ts>)));
32  }();
33 };
34 template <typename... Ts> struct last_intptr_rest_same {
35  static constexpr bool value = []() constexpr {
36  constexpr auto N = sizeof...(Ts);
37  using first_type = typename first_type<Ts...>::type;
38  if (!builtin_same_shape_v<first_type>)
39  return false;
40  int i = 0;
41  using int_type =
42  std::conditional_t<is_vec_or_swizzle_v<first_type>, int32_t, int>;
43  return (
44  (... &&
45  (++i == N
46  ? /* last */ (
47  std::is_pointer_v<Ts> ||
48  (is_multi_ptr_v<Ts> && has_writeable_addr_space_v<Ts>)) &&
49  builtin_same_shape_v<first_type, get_elem_type_t<Ts>> &&
50  !is_swizzle_v<get_elem_type_t<Ts>> &&
51  std::is_same_v<get_elem_type_t<get_elem_type_t<Ts>>,
52  int_type>
53  : /* not last */ builtin_same_or_swizzle_v<first_type, Ts>)));
54  }();
55 };
56 } // namespace detail
57 BUILTIN_CREATE_ENABLER(builtin_enable_math, default_ret_type, fp_elem_type,
58  non_scalar_only, same_elem_type)
59 BUILTIN_CREATE_ENABLER(builtin_enable_math_allow_scalar, default_ret_type,
60  fp_elem_type, any_shape, same_elem_type)
61 BUILTIN_CREATE_ENABLER(builtin_enable_last_int, default_ret_type, fp_elem_type,
62  non_scalar_only, last_int_rest_same)
63 BUILTIN_CREATE_ENABLER(builtin_enable_last_intptr_scalar, default_ret_type,
64  fp_elem_type, scalar_only, last_intptr_rest_same)
65 BUILTIN_CREATE_ENABLER(builtin_enable_last_intptr_non_scalar, default_ret_type,
66  fp_elem_type, non_scalar_only, last_intptr_rest_same)
67 
68 namespace detail {
69 // FIXME: get rid of these.
70 template <typename... Ts>
71 inline constexpr bool builtin_enable_math_allow_scalar_v =
73  check_type_in_v<get_elem_type_t<typename first_type<Ts...>::type>, float,
74  double, half>;
75 } // namespace detail
76 
77 #ifdef __SYCL_DEVICE_ONLY__
78 // Common between generic case and fast math optimized path. Note that vector
79 // case is template with a single implementation between all three types, so we
80 // have to introduce this VEC_IMPL parameter to be able to use native version
81 // for floatN.
82 #define BUILTIN_GENF_DEVICE_COMMON(NUM_ARGS, NAME, VEC_IMPL) \
83  inline double NAME(NUM_ARGS##_TYPE_ARG(double)) { \
84  return __spirv_ocl_##NAME(NUM_ARGS##_ARG); \
85  } \
86  inline half NAME(NUM_ARGS##_TYPE_ARG(half)) { \
87  return __spirv_ocl_##NAME(NUM_ARGS##_CONVERTED_ARG); \
88  } \
89  DEVICE_IMPL_TEMPLATE(NUM_ARGS, NAME, builtin_enable_math_t, VEC_IMPL)
90 
91 #define BUILTIN_GENF(NUM_ARGS, NAME) \
92  inline float NAME(NUM_ARGS##_TYPE_ARG(float)) { \
93  return __spirv_ocl_##NAME(NUM_ARGS##_ARG); \
94  } \
95  BUILTIN_GENF_DEVICE_COMMON(NUM_ARGS, NAME, __spirv_ocl_##NAME)
96 
97 #define BUILTIN_GENF_NATIVE_OPT(NUM_ARGS, NAME) \
98  namespace detail { \
99  template <typename T> \
100  decltype(auto) maybe_fmf_##NAME(NUM_ARGS##_TYPE_ARG(T)) { \
101  if constexpr (use_fast_math_v<T>) { \
102  return __spirv_ocl_native_##NAME(NUM_ARGS##_ARG); \
103  } else { \
104  return __spirv_ocl_##NAME(NUM_ARGS##_ARG); \
105  } \
106  } \
107  } \
108  inline float NAME(NUM_ARGS##_TYPE_ARG(float)) { \
109  return detail::maybe_fmf_##NAME(NUM_ARGS##_CONVERTED_ARG); \
110  } \
111  BUILTIN_GENF_DEVICE_COMMON(NUM_ARGS, NAME, detail::maybe_fmf_##NAME)
112 
113 #else
114 #define BUILTIN_GENF(NUM_ARGS, NAME) \
115  FOR_EACH2(HOST_IMPL_SCALAR, NUM_ARGS, NAME, FP_TYPES) \
116  HOST_IMPL_TEMPLATE(NUM_ARGS, NAME, builtin_enable_math_t, math, \
117  default_ret_type)
118 
119 // Optimization only affects device code.
120 #define BUILTIN_GENF_NATIVE_OPT(NUM_ARGS, NAME) BUILTIN_GENF(NUM_ARGS, NAME)
121 #endif
122 
123 BUILTIN_GENF(ONE_ARG, acos)
124 BUILTIN_GENF(ONE_ARG, acosh)
125 BUILTIN_GENF(ONE_ARG, acospi)
126 BUILTIN_GENF(ONE_ARG, asin)
127 BUILTIN_GENF(ONE_ARG, asinh)
128 BUILTIN_GENF(ONE_ARG, asinpi)
129 BUILTIN_GENF(ONE_ARG, atan)
130 BUILTIN_GENF(ONE_ARG, atanh)
131 BUILTIN_GENF(ONE_ARG, atanpi)
132 BUILTIN_GENF(ONE_ARG, cbrt)
133 BUILTIN_GENF(ONE_ARG, ceil)
134 BUILTIN_GENF(ONE_ARG, cosh)
135 BUILTIN_GENF(ONE_ARG, cospi)
136 BUILTIN_GENF(ONE_ARG, erf)
137 BUILTIN_GENF(ONE_ARG, erfc)
138 BUILTIN_GENF(ONE_ARG, expm1)
139 BUILTIN_GENF(ONE_ARG, fabs)
140 BUILTIN_GENF(ONE_ARG, floor)
141 BUILTIN_GENF(ONE_ARG, lgamma)
142 BUILTIN_GENF(ONE_ARG, log1p)
143 BUILTIN_GENF(ONE_ARG, logb)
144 BUILTIN_GENF(ONE_ARG, rint)
145 BUILTIN_GENF(ONE_ARG, round)
146 BUILTIN_GENF(ONE_ARG, sinh)
147 BUILTIN_GENF(ONE_ARG, sinpi)
148 BUILTIN_GENF(ONE_ARG, tanh)
149 BUILTIN_GENF(ONE_ARG, tanpi)
150 BUILTIN_GENF(ONE_ARG, tgamma)
151 BUILTIN_GENF(ONE_ARG, trunc)
152 BUILTIN_GENF(TWO_ARGS, atan2)
153 BUILTIN_GENF(TWO_ARGS, atan2pi)
154 BUILTIN_GENF(TWO_ARGS, copysign)
155 BUILTIN_GENF(TWO_ARGS, fdim)
156 BUILTIN_GENF(TWO_ARGS, fmod)
157 BUILTIN_GENF(TWO_ARGS, hypot)
158 BUILTIN_GENF(TWO_ARGS, maxmag)
159 BUILTIN_GENF(TWO_ARGS, minmag)
160 BUILTIN_GENF(TWO_ARGS, nextafter)
161 BUILTIN_GENF(TWO_ARGS, pow)
162 BUILTIN_GENF(TWO_ARGS, remainder)
163 BUILTIN_GENF(THREE_ARGS, fma)
164 BUILTIN_GENF(THREE_ARGS, mad)
165 
166 #define BUILTIN_GENF_SCALAR_2ND(NAME) \
167  BUILTIN_GENF(TWO_ARGS, NAME) \
168  template <typename T> \
169  detail::builtin_enable_math_t<T> NAME(T x, detail::get_elem_type_t<T> y) { \
170  return NAME(detail::simplify_if_swizzle_t<T>{x}, \
171  detail::simplify_if_swizzle_t<T>{y}); \
172  }
173 
174 BUILTIN_GENF_SCALAR_2ND(fmax)
175 BUILTIN_GENF_SCALAR_2ND(fmin)
176 
177 #undef BUILTIN_GENF_SCALAR_2ND
178 
179 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, cos)
180 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, exp)
181 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, exp10)
182 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, exp2)
183 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, log)
184 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, log10)
185 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, log2)
186 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, rsqrt)
187 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, sin)
188 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, sqrt)
189 BUILTIN_GENF_NATIVE_OPT(ONE_ARG, tan)
190 BUILTIN_GENF_NATIVE_OPT(TWO_ARGS, powr)
191 
192 #undef BUILTIN_GENF_NATIVE_OPT
193 #undef BUILTIN_GENF
194 #undef BUILTIN_GENF_DEVICE_COMMON
195 
196 namespace detail {
197 template <typename T0, typename T1>
198 inline constexpr bool builtin_ptr_check_v =
199  (std::is_pointer_v<T1> ||
200  (is_multi_ptr_v<T1> && has_writeable_addr_space_v<T1>)) &&
201  is_valid_elem_type_v<T1, simplify_if_swizzle_t<T0>>;
202 
203 template <typename T0, typename T1>
204 inline constexpr bool builtin_enable_ptr_v =
205  builtin_enable_math_allow_scalar_v<T0> && builtin_ptr_check_v<T0, T1>;
206 
207 template <typename T0, typename T1>
208 using builtin_enable_ptr_scalar_t =
209  std::enable_if_t<builtin_enable_ptr_v<T0, T1> && is_scalar_arithmetic_v<T0>,
210  T0>;
211 
212 template <typename T0, typename T1>
213 using builtin_enable_ptr_non_scalar_t =
214  std::enable_if_t<builtin_enable_ptr_v<T0, T1> &&
215  !is_scalar_arithmetic_v<T0>,
216  simplify_if_swizzle_t<T0>>;
217 
218 template <typename FuncTy, typename PtrTy, typename... Ts>
219 auto builtin_delegate_ptr_impl(FuncTy F, PtrTy p, Ts... xs) {
220  using T0 = typename first_type<Ts...>::type;
221  // Simplify just incase, although most callers seem to do that on their own.
222  simplify_if_swizzle_t<T0> r{};
223 
224  // TODO: Optimize for sizes. Make not to violate ANSI-aliasing rules for the
225  // pointer argument.
226  auto p0 = [&]() {
227  if constexpr (is_multi_ptr_v<PtrTy>)
228  return address_space_cast<PtrTy::address_space,
229  get_multi_ptr_decoration_v<PtrTy>>(&(*p)[0]);
230  else
231  return &(*p)[0];
232  }();
233 
234  constexpr auto N = T0::size();
235  if constexpr (N <= 16)
236  loop<N>([&](auto i) { r[i] = F(xs[i]..., p0 + i); });
237  else
238  for (size_t i = 0; i < N; ++i)
239  r[i] = F(xs[i]..., p0 + i);
240  return r;
241 }
242 } // namespace detail
243 
244 #define LAST_PTR_SCALAR(NUM_ARGS, NAME, SCALAR_ENABLER, TYPE) \
245  template <typename PtrTy> \
246  detail::SCALAR_ENABLER<TYPE, PtrTy> NAME( \
247  SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TYPE_ARG)(TYPE), PtrTy p) { \
248  return detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p); \
249  }
250 #define BUILTIN_LAST_PTR_COMMON(NUM_ARGS, NAME, SCALAR_ENABLER, \
251  NON_SCALAR_ENABLER) \
252  FOR_EACH3(LAST_PTR_SCALAR, NUM_ARGS, NAME, SCALAR_ENABLER, FP_TYPES) \
253  template <SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TYPENAME_TYPE), typename PtrTy> \
254  detail::NON_SCALAR_ENABLER<SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE), \
255  PtrTy> \
256  NAME(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE_ARG), PtrTy p) { \
257  return detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p); \
258  }
259 
260 #if __SYCL_DEVICE_ONLY__
261 #define BUILTIN_LAST_PTR(NUM_ARGS, NAME, SCALAR_ENABLER, NON_SCALAR_ENABLER) \
262  namespace detail { \
263  template <NUM_ARGS##_TYPENAME_TYPE> \
264  auto NAME##_impl(NUM_ARGS##_TEMPLATE_TYPE_ARG_REF) { \
265  if constexpr (is_marray_v<T0>) { \
266  return builtin_delegate_ptr_impl( \
267  [](auto... xs) { return NAME##_impl(xs...); }, \
268  NUM_ARGS##_ARG_ROTATED); \
269  } else { \
270  return __spirv_ocl_##NAME(NUM_ARGS##_CONVERTED_ARG); \
271  } \
272  } \
273  } /* namespace detail */ \
274  BUILTIN_LAST_PTR_COMMON(NUM_ARGS, NAME, SCALAR_ENABLER, NON_SCALAR_ENABLER)
275 #else
276 #define BUILTIN_LAST_PTR(NUM_ARGS, NAME, SCALAR_ENABLER, NON_SCALAR_ENABLER) \
277  BUILTIN_LAST_PTR_COMMON(NUM_ARGS, NAME, SCALAR_ENABLER, NON_SCALAR_ENABLER)
278 #endif
279 
280 namespace detail {
281 template <typename T>
282 using builtin_last_raw_intptr_t =
283  // FIXME: Should we allow marray here, or limit just to vec/swizzle/ scalar?
284  // If not, "enabler" has to be changed as well.
285  change_elements_t<std::conditional_t<is_marray_v<T>, int, int32_t>,
286  simplify_if_swizzle_t<T>> *;
287 }
288 
289 #ifdef __SYCL_DEVICE_ONLY__
290 #define BUILTIN_LAST_INTPTR(NUM_ARGS, NAME) \
291  BUILTIN_LAST_PTR(NUM_ARGS, NAME, builtin_enable_last_intptr_scalar_t, \
292  builtin_enable_last_intptr_non_scalar_t)
293 #else
294 #define LAST_INT_PTR_DECLARE_SCALAR(NUM_ARGS, NAME, TYPE) \
295  __SYCL_EXPORT TYPE NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TYPE)(TYPE), \
296  int *);
297 #define BUILTIN_LAST_INTPTR(NUM_ARGS, NAME) \
298  namespace detail { \
299  FOR_EACH2(LAST_INT_PTR_DECLARE_SCALAR, NUM_ARGS, NAME, FP_TYPES) \
300  template <SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TYPENAME_TYPE), typename PtrTy> \
301  auto NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE_ARG_REF), \
302  PtrTy p) { \
303  if constexpr (is_multi_ptr_v<PtrTy>) { \
304  /* TODO: Can't really create multi_ptr on host... */ \
305  return NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p.get_raw()); \
306  } else { \
307  return builtin_delegate_ptr_impl( \
308  [](auto... xs) { return NAME##_impl(xs...); }, p, \
309  SYCL_CONCAT(LESS_ONE(NUM_ARGS), _SIMPLIFIED_ARG)); \
310  } \
311  } \
312  } /* namespace detail */ \
313  BUILTIN_LAST_PTR(NUM_ARGS, NAME, builtin_enable_last_intptr_scalar_t, \
314  builtin_enable_last_intptr_non_scalar_t)
315 #endif
316 
317 BUILTIN_LAST_INTPTR(TWO_ARGS, frexp)
318 BUILTIN_LAST_INTPTR(TWO_ARGS, lgamma_r)
319 BUILTIN_LAST_INTPTR(THREE_ARGS, remquo)
320 
321 #undef BUILTIN_LAST_INTPTR
322 #undef LAST_INT_PTR_DECLARE_SCALAR
323 
324 #ifndef __SYCL_DEVICE_ONLY__
325 namespace detail {
326 template <typename T0, typename T1> auto fract_impl(T0 &x, T1 &y) {
327  auto flr = floor(simplify_if_swizzle_t<T0>{x});
328  *y = flr;
329  return fmin(x - flr, nextafter(simplify_if_swizzle_t<T0>{1.0},
330  simplify_if_swizzle_t<T0>{0.0}));
331 }
332 } // namespace detail
333 #endif
334 BUILTIN_LAST_PTR(TWO_ARGS, fract, builtin_enable_ptr_scalar_t,
335  builtin_enable_ptr_non_scalar_t)
336 
337 #ifndef __SYCL_DEVICE_ONLY__
338 namespace detail {
339 __SYCL_EXPORT float modf_impl(float, float *);
340 __SYCL_EXPORT double modf_impl(double, double *);
341 __SYCL_EXPORT half modf_impl(half, half *);
342 template <typename T0, typename T1> auto modf_impl(T0 &x, T1 &&y) {
343  if constexpr (is_multi_ptr_v<std::remove_reference_t<T1>>) {
344  // TODO: Spec needs to be clarified, multi_ptr shouldn't be possible on
345  // host.
346  return modf_impl(x, y.get_raw());
347  } else {
348  return builtin_delegate_ptr_impl(
349  [](auto x, auto y) { return modf_impl(x, y); }, y,
350  simplify_if_swizzle_t<T0>{x});
351  }
352 }
353 } // namespace detail
354 #endif
355 BUILTIN_LAST_PTR(TWO_ARGS, modf, builtin_enable_ptr_scalar_t,
356  builtin_enable_ptr_non_scalar_t)
357 
358 #undef BUILTIN_LAST_PTR
359 
360 #ifdef __SYCL_DEVICE_ONLY__
361 #define BUILTIN_MATH_LAST_INT(NAME) \
362  float NAME(float x, int y) { return __spirv_ocl_##NAME(x, y); } \
363  double NAME(double x, int y) { return __spirv_ocl_##NAME(x, y); } \
364  half NAME(half x, int y) { \
365  return __spirv_ocl_##NAME(detail::builtins::convert_arg(x), y); \
366  } \
367  DEVICE_IMPL_TEMPLATE(TWO_ARGS, NAME, builtin_enable_last_int_t, \
368  __spirv_ocl_##NAME)
369 #else
370 #define SCALAR_EXTERN_LAST_INT(NAME, TYPE) \
371  inline TYPE NAME(TYPE x, int y) { \
372  extern SYCL_BUILTIN_EXPORT TYPE __##NAME##_impl(TYPE, int); \
373  return __##NAME##_impl(x, y); \
374  }
375 #define BUILTIN_MATH_LAST_INT(NAME) \
376  FOR_EACH1(SCALAR_EXTERN_LAST_INT, NAME, FP_TYPES) \
377  HOST_IMPL_TEMPLATE(TWO_ARGS, NAME, builtin_enable_last_int_t, math, \
378  default_ret_type)
379 #endif
380 
384 template <typename T> detail::builtin_enable_math_t<T> ldexp(T x, int y) {
385  return ldexp(
386  detail::simplify_if_swizzle_t<T>{x},
387  detail::change_elements_t<int, detail::simplify_if_swizzle_t<T>>{y});
388 }
389 
390 #undef BUILTIN_MATH_LAST_INT
391 #undef SCALAR_EXTERN_LAST_INT
392 
393 namespace detail {
394 #ifdef __SYCL_DEVICE_ONLY__
395 template <typename T0, typename T1> auto sincos_impl(T0 &x, T1 &&y) {
396  if constexpr (is_marray_v<T0>) {
397  return builtin_delegate_ptr_impl(
398  [](auto... xs) { return sincos_impl(xs...); }, y, x);
399  } else {
400  using detail::builtins::convert_arg;
401  if constexpr (use_fast_math_v<T0>) {
402  // This is a performance optimization to ensure that sincos isn't slower
403  // than a pair of sin/cos executed separately. Theoretically, calling
404  // non-native sincos might be faster than calling native::sin plus
405  // native::cos separately and we'd need some kind of cost model to make
406  // the right decision (and move this entirely to the JIT/AOT compilers).
407  // However, in practice, this simpler solution seems to work just fine and
408  // matches how sin/cos above are optimized for the fast math path.
409  *y = __spirv_ocl_native_cos(convert_arg(x));
410  return __spirv_ocl_native_sin(convert_arg(x));
411  } else {
412  return __spirv_ocl_sincos(convert_arg(x), convert_arg(y));
413  }
414  }
415 }
416 #else
417 __SYCL_EXPORT float sincos_impl(float, float *);
418 __SYCL_EXPORT double sincos_impl(double, double *);
419 __SYCL_EXPORT half sincos_impl(half, half *);
420 template <typename T0, typename T1> auto sincos_impl(T0 &x, T1 &&y) {
421  if constexpr (is_multi_ptr_v<std::remove_reference_t<T1>>) {
422  // TODO: Spec needs to be clarified, multi_ptr shouldn't be possible on
423  // host.
424  return sincos_impl(x, y.get_raw());
425  } else {
426  return builtin_delegate_ptr_impl(
427  [](auto... xs) { return sincos_impl(xs...); }, y,
428  simplify_if_swizzle_t<T0>{x});
429  }
430 }
431 #endif
432 } // namespace detail
433 BUILTIN_LAST_PTR_COMMON(TWO_ARGS, sincos, builtin_enable_ptr_scalar_t,
434  builtin_enable_ptr_non_scalar_t)
435 
436 #undef BUILTIN_LAST_PTR_COMMON
437 #undef LAST_PTR_SCALAR
438 
439 namespace detail {
440 template <typename T>
441 struct ilogb_ret_traits
442  : change_elements<std::conditional_t<is_vec_or_swizzle_v<T>, int32_t, int>,
443  T> {};
444 template <typename T>
445 using builtin_enable_ilogb_t =
446  std::enable_if_t<builtin_enable_math_allow_scalar_v<T>,
447  typename ilogb_ret_traits<T>::type>;
448 } // namespace detail
449 
450 #ifdef __SYCL_DEVICE_ONLY__
451 inline int ilogb(float x) { return __spirv_ocl_ilogb(x); }
452 inline int ilogb(double x) { return __spirv_ocl_ilogb(x); }
453 inline int ilogb(half x) {
454  return __spirv_ocl_ilogb(detail::builtins::convert_arg(x));
455 }
456 DEVICE_IMPL_TEMPLATE(ONE_ARG, ilogb, builtin_enable_ilogb_t, __spirv_ocl_ilogb)
457 #else
458 inline int ilogb(float x) {
459  extern SYCL_BUILTIN_EXPORT int __ilogb_impl(float);
460  return __ilogb_impl(x);
461 }
462 inline int ilogb(double x) {
463  extern SYCL_BUILTIN_EXPORT int __ilogb_impl(double);
464  return __ilogb_impl(x);
465 }
466 inline int ilogb(half x) {
467  extern SYCL_BUILTIN_EXPORT int __ilogb_impl(half);
468  return __ilogb_impl(x);
469 }
470 HOST_IMPL_TEMPLATE(ONE_ARG, ilogb, builtin_enable_ilogb_t, math,
471  ilogb_ret_traits)
472 #endif
473 
474 // nan implementation, as per
475 // https://github.com/KhronosGroup/SYCL-Docs/pull/519.
476 namespace detail {
477 template <typename T>
478 // clang-format off
479 using nan_elem_result_type = change_elements_t<
480  typename map_type<get_elem_type_t<T>,
481  uint32_t, /*->*/ float,
482  uint64_t, /*->*/ double,
483  uint16_t, /*->*/ half>::type,
484  T>;
485 // clang-format on
486 
487 template <typename T>
488 using builtin_enable_nan_t = std::enable_if_t<
489  (((is_vec_or_swizzle_v<T> || is_marray_v<T>)) &&
490  check_type_in_v<get_elem_type_t<T>, uint32_t, uint64_t, uint16_t>),
491  nan_elem_result_type<T>>;
492 } // namespace detail
493 
494 #ifdef __SYCL_DEVICE_ONLY__
495 inline float nan(uint32_t x) {
496  return __spirv_ocl_nan(detail::builtins::convert_arg(x));
497 }
498 inline double nan(uint64_t x) {
499  return __spirv_ocl_nan(detail::builtins::convert_arg(x));
500 }
501 inline half nan(uint16_t x) {
502  return __spirv_ocl_nan(detail::builtins::convert_arg(x));
503 }
504 DEVICE_IMPL_TEMPLATE(ONE_ARG, nan, builtin_enable_nan_t, __spirv_ocl_nan)
505 #else
506 inline float nan(uint32_t) { return std::numeric_limits<float>::quiet_NaN(); }
507 inline double nan(uint64_t) { return std::numeric_limits<float>::quiet_NaN(); }
508 // NOTE: half_type.hpp provides partial specialization for std::numeric_limits.
509 inline half nan(uint16_t) { return std::numeric_limits<half>::quiet_NaN(); }
510 template <typename T> detail::builtin_enable_nan_t<T> nan(T x) {
511  return detail::builtin_delegate_to_scalar([](auto x) { return nan(x); }, x);
512 }
513 #endif
514 
515 } // namespace _V1
516 } // namespace sycl
#define BUILTIN_CREATE_ENABLER(NAME, RET_TYPE_TRAIT, ELEM_TYPE_CHECKER, SHAPE_CHECKER, EXTRA_CONDITIONS)
Definition: builtins.hpp:220
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
Definition: math.hpp:406
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
Definition: math.hpp:432
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
Definition: math.hpp:502
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
Definition: math.hpp:398
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
Definition: math.hpp:402
#define HOST_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER, FUNC_CLASS, RET_TYPE_TRAITS)
#define SYCL_BUILTIN_EXPORT
#define DEVICE_IMPL_TEMPLATE(NUM_ARGS, NAME, ENABLER,...)
#define BUILTIN_GENF(NUM_ARGS, NAME)
#define BUILTIN_MATH_LAST_INT(NAME, IMPL)
constexpr bool check_type_in_v
constexpr bool builtin_same_shape_v
Definition: builtins.hpp:72
auto builtin_delegate_to_scalar(FuncTy F, const Ts &...x)
Definition: builtins.hpp:156
typename get_elem_type< T >::type get_elem_type_t
typename change_elements< NewElemT, T >::type change_elements_t
constexpr bool is_multi_ptr_v
void loop(F &&f)
Definition: helpers.hpp:250
constexpr bool builtin_same_or_swizzle_v
Definition: builtins.hpp:79
constexpr bool is_swizzle_v
std::enable_if_t< std::is_same_v< Tp, float >, float > copysign(Tp x, Tp y)
Definition: math.hpp:77
__SYCL_ALWAYS_INLINE std::enable_if_t< sycl::detail::is_svgenfloatf_v< T >||sycl::detail::is_svgenfloath_v< T >, T > tanh(T x) __NOEXC
Definition: builtins.hpp:100
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > atanh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > tan(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > asin(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > cosh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > asinh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > acos(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > acosh(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > log10(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > atan(const complex< _Tp > &__x)
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, complex< _Tp > > sinh(const complex< _Tp > &__x)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fabs(T x)
float ceil(float)
return(x >=T(0)) ? T(std sinpi
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > log(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > sin(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > cos(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
float floor(float)
float rint(float)
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > exp(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
sycl::detail::half_impl::half half
Definition: aliases.hpp:101
autodecltype(x) x
float trunc(float)
Definition: access.hpp:18
std::enable_if_t< std::is_floating_point_v< ValueT >||std::is_same_v< sycl::half, ValueT >, ValueT > cbrt(ValueT val)
cbrt function wrapper.
Definition: math.hpp:692