26 return {x[start], x[start + 1]};
30 #ifdef __SYCL_DEVICE_ONLY__
37 #define __FAST_MATH_GENFLOAT(T) \
38 (detail::is_svgenfloatd<T>::value || detail::is_svgenfloath<T>::value)
39 #define __FAST_MATH_SGENFLOAT(T) \
40 (std::is_same_v<T, double> || std::is_same_v<T, half>)
42 #define __FAST_MATH_GENFLOAT(T) (detail::is_svgenfloat<T>::value)
43 #define __FAST_MATH_SGENFLOAT(T) (detail::is_sgenfloat<T>::value)
53 #define __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \
55 for (size_t i = 0; i < N / 2; i++) { \
56 vec<T, 2> partial_res = \
57 __sycl_std::__invoke_##NAME<vec<T, 2>>(detail::to_vec2(x, i * 2)); \
58 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>)); \
61 res[N - 1] = __sycl_std::__invoke_##NAME<T>(x[N - 1]); \
65 #define __SYCL_MATH_FUNCTION_OVERLOAD(NAME) \
66 template <typename T, size_t N> \
67 inline __SYCL_ALWAYS_INLINE \
68 std::enable_if_t<detail::is_sgenfloat<T>::value, marray<T, N>> \
69 NAME(marray<T, N> x) __NOEXC { \
70 __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \
102 #undef __SYCL_MATH_FUNCTION_OVERLOAD
106 #define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \
107 template <typename T, size_t N> \
108 inline __SYCL_ALWAYS_INLINE \
109 std::enable_if_t<__FAST_MATH_SGENFLOAT(T), marray<T, N>> \
110 NAME(marray<T, N> x) __NOEXC { \
111 __SYCL_MATH_FUNCTION_OVERLOAD_IMPL(NAME) \
126 #undef __SYCL_MATH_FUNCTION_OVERLOAD_FM
127 #undef __SYCL_MATH_FUNCTION_OVERLOAD_IMPL
129 #define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \
131 for (size_t i = 0; i < N / 2; i++) { \
132 auto partial_res = __sycl_std::__invoke_##NAME<vec<T, 2>>( \
133 detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \
134 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>)); \
137 res[N - 1] = __sycl_std::__invoke_##NAME<T>(x[N - 1], y[N - 1]); \
141 #define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME) \
142 template <typename T, size_t N> \
143 inline __SYCL_ALWAYS_INLINE \
144 std::enable_if_t<detail::is_sgenfloat<T>::value, marray<T, N>> \
145 NAME(marray<T, N> x, marray<T, N> y) __NOEXC { \
146 __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \
163 #undef __SYCL_MATH_FUNCTION_2_OVERLOAD
165 template <
typename T,
size_t N>
171 #undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL
173 #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \
174 template <typename T, size_t N> \
175 inline __SYCL_ALWAYS_INLINE \
176 std::enable_if_t<detail::is_sgenfloat<T>::value, marray<T, N>> \
177 NAME(marray<T, N> x, marray<T, N> y, marray<T, N> z) __NOEXC { \
179 for (size_t i = 0; i < N / 2; i++) { \
180 auto partial_res = __sycl_std::__invoke_##NAME<vec<T, 2>>( \
181 detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2), \
182 detail::to_vec2(z, i * 2)); \
183 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>)); \
187 __sycl_std::__invoke_##NAME<T>(x[N - 1], y[N - 1], z[N - 1]); \
195 #undef __SYCL_MATH_FUNCTION_3_OVERLOAD
198 template <
typename T>
199 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
acos(T x)
__NOEXC {
200 return __sycl_std::__invoke_acos<T>(x);
204 template <
typename T>
205 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> acosh(T x)
__NOEXC {
206 return __sycl_std::__invoke_acosh<T>(x);
210 template <
typename T>
211 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> acospi(T x)
__NOEXC {
212 return __sycl_std::__invoke_acospi<T>(x);
216 template <
typename T>
217 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
asin(T x)
__NOEXC {
218 return __sycl_std::__invoke_asin<T>(x);
222 template <
typename T>
223 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> asinh(T x)
__NOEXC {
224 return __sycl_std::__invoke_asinh<T>(x);
228 template <
typename T>
229 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> asinpi(T x)
__NOEXC {
230 return __sycl_std::__invoke_asinpi<T>(x);
234 template <
typename T>
235 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
237 return __sycl_std::__invoke_atan<T>(y_over_x);
241 template <
typename T>
242 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
atan2(T y,
244 return __sycl_std::__invoke_atan2<T>(y, x);
248 template <
typename T>
249 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> atanh(T x)
__NOEXC {
250 return __sycl_std::__invoke_atanh<T>(x);
254 template <
typename T>
255 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> atanpi(T x)
__NOEXC {
256 return __sycl_std::__invoke_atanpi<T>(x);
260 template <
typename T>
261 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> atan2pi(T y,
263 return __sycl_std::__invoke_atan2pi<T>(y, x);
267 template <
typename T>
268 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> cbrt(T x)
__NOEXC {
269 return __sycl_std::__invoke_cbrt<T>(x);
273 template <
typename T>
274 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
ceil(T x)
__NOEXC {
275 return __sycl_std::__invoke_ceil<T>(x);
279 template <
typename T>
280 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
copysign(T x,
282 return __sycl_std::__invoke_copysign<T>(x, y);
286 template <
typename T>
288 return __sycl_std::__invoke_cos<T>(x);
292 template <
typename T>
293 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> cosh(T x)
__NOEXC {
294 return __sycl_std::__invoke_cosh<T>(x);
298 template <
typename T>
299 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> cospi(T x)
__NOEXC {
300 return __sycl_std::__invoke_cospi<T>(x);
304 template <
typename T>
305 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> erfc(T x)
__NOEXC {
306 return __sycl_std::__invoke_erfc<T>(x);
310 template <
typename T>
311 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> erf(T x)
__NOEXC {
312 return __sycl_std::__invoke_erf<T>(x);
316 template <
typename T>
318 return __sycl_std::__invoke_exp<T>(x);
322 template <
typename T>
324 return __sycl_std::__invoke_exp2<T>(x);
328 template <
typename T>
330 return __sycl_std::__invoke_exp10<T>(x);
334 template <
typename T>
335 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> expm1(T x)
__NOEXC {
336 return __sycl_std::__invoke_expm1<T>(x);
340 template <
typename T>
341 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
fabs(T x)
__NOEXC {
342 return __sycl_std::__invoke_fabs<T>(x);
346 template <
typename T>
347 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> fdim(T x, T y)
__NOEXC {
348 return __sycl_std::__invoke_fdim<T>(x, y);
352 template <
typename T>
353 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
floor(T x)
__NOEXC {
354 return __sycl_std::__invoke_floor<T>(x);
358 template <
typename T>
359 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
fma(T a, T b,
361 return __sycl_std::__invoke_fma<T>(a, b, c);
365 template <
typename T>
366 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
fmax(T x, T y)
__NOEXC {
367 return __sycl_std::__invoke_fmax<T>(x, y);
371 template <
typename T>
372 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
374 return __sycl_std::__invoke_fmax<T>(x, T(y));
378 template <
typename T>
379 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
fmin(T x, T y)
__NOEXC {
380 return __sycl_std::__invoke_fmin<T>(x, y);
384 template <
typename T>
385 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
387 return __sycl_std::__invoke_fmin<T>(x, T(y));
391 template <
typename T>
392 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
fmod(T x, T y)
__NOEXC {
393 return __sycl_std::__invoke_fmod<T>(x, y);
397 template <
typename T,
typename T2>
399 detail::is_svgenfloat<T>::value && detail::is_genfloatptr<T2>::value, T>
401 detail::check_vector_size<T, T2>();
402 return __sycl_std::__invoke_fract<T>(x, iptr);
406 template <
typename T,
typename T2>
408 detail::is_svgenfloat<T>::value && detail::is_genintptr<T2>::value, T>
410 detail::check_vector_size<T, T2>();
411 return __sycl_std::__invoke_frexp<T>(x,
exp);
415 template <
typename T>
416 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> hypot(T x,
418 return __sycl_std::__invoke_hypot<T>(x, y);
422 template <
typename T,
423 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
424 detail::change_base_type_t<T, int> ilogb(T x)
__NOEXC {
425 return __sycl_std::__invoke_ilogb<detail::change_base_type_t<T, int>>(x);
431 template <
typename T>
432 detail::enable_if_t<detail::is_sgenfloat<T>::value, T> ldexp(T x,
434 return __sycl_std::__invoke_ldexp<T>(x, k);
438 template <
typename T>
439 detail::enable_if_t<detail::is_vgenfloat<T>::value, T> ldexp(T x,
441 return __sycl_std::__invoke_ldexp<T>(x, vec<
int, T::size()>(k));
445 template <
typename T,
typename T2>
447 detail::is_vgenfloat<T>::value && detail::is_intn<T2>::value, T>
449 detail::check_vector_size<T, T2>();
450 return __sycl_std::__invoke_ldexp<T>(x, k);
454 template <
typename T>
455 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> lgamma(T x)
__NOEXC {
456 return __sycl_std::__invoke_lgamma<T>(x);
460 template <
typename T,
typename T2>
462 detail::is_svgenfloat<T>::value && detail::is_genintptr<T2>::value, T>
463 lgamma_r(T x, T2 signp)
__NOEXC {
464 detail::check_vector_size<T, T2>();
465 return __sycl_std::__invoke_lgamma_r<T>(x, signp);
469 template <
typename T>
471 return __sycl_std::__invoke_log<T>(x);
475 template <
typename T>
477 return __sycl_std::__invoke_log2<T>(x);
481 template <
typename T>
483 return __sycl_std::__invoke_log10<T>(x);
487 template <
typename T>
488 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> log1p(T x)
__NOEXC {
489 return __sycl_std::__invoke_log1p<T>(x);
493 template <
typename T>
494 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> logb(T x)
__NOEXC {
495 return __sycl_std::__invoke_logb<T>(x);
499 template <
typename T>
500 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> mad(T a, T b,
502 return __sycl_std::__invoke_mad<T>(a, b, c);
506 template <
typename T>
507 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> maxmag(T x,
509 return __sycl_std::__invoke_maxmag<T>(x, y);
513 template <
typename T>
514 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> minmag(T x,
516 return __sycl_std::__invoke_minmag<T>(x, y);
520 template <
typename T,
typename T2>
522 detail::is_svgenfloat<T>::value && detail::is_genfloatptr<T2>::value, T>
524 detail::check_vector_size<T, T2>();
525 return __sycl_std::__invoke_modf<T>(x, iptr);
528 template <
typename T,
529 typename = detail::enable_if_t<detail::is_nan_type<T>::value, T>>
530 detail::nan_return_t<T> nan(T nancode)
__NOEXC {
531 return __sycl_std::__invoke_nan<detail::nan_return_t<T>>(
532 detail::convert_data_type<T, detail::nan_argument_base_t<T>>()(nancode));
536 template <
typename T>
537 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> nextafter(T x,
539 return __sycl_std::__invoke_nextafter<T>(x, y);
543 template <
typename T>
544 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
pow(T x, T y)
__NOEXC {
545 return __sycl_std::__invoke_pow<T>(x, y);
549 template <
typename T,
typename T2>
551 detail::is_svgenfloat<T>::value && detail::is_genint<T2>::value, T>
553 detail::check_vector_size<T, T2>();
554 return __sycl_std::__invoke_pown<T>(x, y);
558 template <
typename T>
560 return __sycl_std::__invoke_powr<T>(x, y);
564 template <
typename T>
565 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> remainder(T x,
567 return __sycl_std::__invoke_remainder<T>(x, y);
571 template <
typename T,
typename T2>
573 detail::is_svgenfloat<T>::value && detail::is_genintptr<T2>::value, T>
574 remquo(T x, T y, T2 quo)
__NOEXC {
575 detail::check_vector_size<T, T2>();
576 return __sycl_std::__invoke_remquo<T>(x, y, quo);
580 template <
typename T>
581 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
rint(T x)
__NOEXC {
582 return __sycl_std::__invoke_rint<T>(x);
586 template <
typename T,
typename T2>
588 detail::is_svgenfloat<T>::value && detail::is_genint<T2>::value, T>
590 detail::check_vector_size<T, T2>();
591 return __sycl_std::__invoke_rootn<T>(x, y);
595 template <
typename T>
596 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> round(T x)
__NOEXC {
597 return __sycl_std::__invoke_round<T>(x);
601 template <
typename T>
603 return __sycl_std::__invoke_rsqrt<T>(x);
607 template <
typename T>
609 return __sycl_std::__invoke_sin<T>(x);
613 template <
typename T,
typename T2>
615 detail::is_svgenfloat<T>::value && detail::is_genfloatptr<T2>::value, T>
617 detail::check_vector_size<T, T2>();
618 return __sycl_std::__invoke_sincos<T>(x, cosval);
622 template <
typename T>
623 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> sinh(T x)
__NOEXC {
624 return __sycl_std::__invoke_sinh<T>(x);
628 template <
typename T>
629 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> sinpi(T x)
__NOEXC {
630 return __sycl_std::__invoke_sinpi<T>(x);
634 template <
typename T>
636 return __sycl_std::__invoke_sqrt<T>(x);
640 template <
typename T>
642 return __sycl_std::__invoke_tan<T>(x);
646 template <
typename T>
647 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
tanh(T x)
__NOEXC {
648 return __sycl_std::__invoke_tanh<T>(x);
652 template <
typename T>
653 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> tanpi(T x)
__NOEXC {
654 return __sycl_std::__invoke_tanpi<T>(x);
658 template <
typename T>
659 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> tgamma(T x)
__NOEXC {
660 return __sycl_std::__invoke_tgamma<T>(x);
664 template <
typename T>
665 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
trunc(T x)
__NOEXC {
666 return __sycl_std::__invoke_trunc<T>(x);
671 template <
typename T>
672 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
674 return __sycl_std::__invoke_fclamp<T>(x, minval, maxval);
680 template <
typename T>
681 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
682 clamp(T x,
typename T::element_type minval,
683 typename T::element_type maxval)
__NOEXC {
684 return __sycl_std::__invoke_fclamp<T>(x, T(minval), T(maxval));
688 template <
typename T>
689 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
691 return __sycl_std::__invoke_degrees<T>(radians);
695 template <
typename T>
696 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
abs(T x)
__NOEXC {
697 return __sycl_std::__invoke_fabs<T>(x);
701 template <
typename T>
702 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>(
max)(T x, T y)
__NOEXC {
703 return __sycl_std::__invoke_fmax_common<T>(x, y);
709 template <
typename T>
710 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>(
max)(
711 T x,
typename T::element_type y)
__NOEXC {
712 return __sycl_std::__invoke_fmax_common<T>(x, T(y));
716 template <
typename T>
717 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>(
min)(T x, T y)
__NOEXC {
718 return __sycl_std::__invoke_fmin_common<T>(x, y);
724 template <
typename T>
725 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>(
min)(
726 T x,
typename T::element_type y)
__NOEXC {
727 return __sycl_std::__invoke_fmin_common<T>(x, T(y));
731 template <
typename T>
732 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> mix(T x, T y,
734 return __sycl_std::__invoke_mix<T>(x, y, a);
740 template <
typename T>
741 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
742 mix(T x, T y,
typename T::element_type a)
__NOEXC {
743 return __sycl_std::__invoke_mix<T>(x, y, T(a));
747 template <
typename T>
748 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
750 return __sycl_std::__invoke_radians<T>(degrees);
754 template <
typename T>
755 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> step(T edge,
757 return __sycl_std::__invoke_step<T>(edge, x);
763 template <
typename T>
764 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
765 step(
typename T::element_type edge, T x)
__NOEXC {
766 return __sycl_std::__invoke_step<T>(T(edge), x);
770 template <
typename T>
771 detail::enable_if_t<detail::is_svgenfloat<T>::value, T>
772 smoothstep(T edge0, T edge1, T x)
__NOEXC {
773 return __sycl_std::__invoke_smoothstep<T>(edge0, edge1, x);
779 template <
typename T>
780 detail::enable_if_t<detail::is_vgenfloat<T>::value, T>
781 smoothstep(
typename T::element_type edge0,
typename T::element_type edge1,
783 return __sycl_std::__invoke_smoothstep<T>(T(edge0), T(edge1), x);
787 template <
typename T>
788 detail::enable_if_t<detail::is_svgenfloat<T>::value, T> sign(T x)
__NOEXC {
789 return __sycl_std::__invoke_sign<T>(x);
794 template <
typename T>
795 detail::enable_if_t<detail::is_ugeninteger<T>::value, T>
abs(T x)
__NOEXC {
796 return __sycl_std::__invoke_u_abs<T>(x);
800 template <
typename T>
801 detail::enable_if_t<detail::is_igeninteger<T>::value,
802 detail::make_unsigned_t<T>>
804 return __sycl_std::__invoke_s_abs<detail::make_unsigned_t<T>>(x);
808 template <
typename T>
809 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> abs_diff(T x,
811 return __sycl_std::__invoke_u_abs_diff<T>(x, y);
815 template <
typename T>
816 detail::enable_if_t<detail::is_igeninteger<T>::value,
817 detail::make_unsigned_t<T>>
819 return __sycl_std::__invoke_s_abs_diff<detail::make_unsigned_t<T>>(x, y);
823 template <
typename T>
824 detail::enable_if_t<detail::is_igeninteger<T>::value, T> add_sat(T x,
826 return __sycl_std::__invoke_s_add_sat<T>(x, y);
830 template <
typename T>
831 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> add_sat(T x,
833 return __sycl_std::__invoke_u_add_sat<T>(x, y);
837 template <
typename T>
838 detail::enable_if_t<detail::is_igeninteger<T>::value, T>
hadd(T x,
840 return __sycl_std::__invoke_s_hadd<T>(x, y);
844 template <
typename T>
845 detail::enable_if_t<detail::is_ugeninteger<T>::value, T>
hadd(T x,
847 return __sycl_std::__invoke_u_hadd<T>(x, y);
851 template <
typename T>
852 detail::enable_if_t<detail::is_igeninteger<T>::value, T> rhadd(T x,
854 return __sycl_std::__invoke_s_rhadd<T>(x, y);
858 template <
typename T>
859 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> rhadd(T x,
861 return __sycl_std::__invoke_u_rhadd<T>(x, y);
865 template <
typename T>
866 detail::enable_if_t<detail::is_igeninteger<T>::value, T>
868 return __sycl_std::__invoke_s_clamp<T>(x, minval, maxval);
872 template <
typename T>
873 detail::enable_if_t<detail::is_ugeninteger<T>::value, T>
875 return __sycl_std::__invoke_u_clamp<T>(x, minval, maxval);
879 template <
typename T>
880 detail::enable_if_t<detail::is_vigeninteger<T>::value, T>
881 clamp(T x,
typename T::element_type minval,
882 typename T::element_type maxval)
__NOEXC {
883 return __sycl_std::__invoke_s_clamp<T>(x, T(minval), T(maxval));
887 template <
typename T>
888 detail::enable_if_t<detail::is_vugeninteger<T>::value, T>
889 clamp(T x,
typename T::element_type minval,
890 typename T::element_type maxval)
__NOEXC {
891 return __sycl_std::__invoke_u_clamp<T>(x, T(minval), T(maxval));
895 template <
typename T>
896 detail::enable_if_t<detail::is_geninteger<T>::value, T> clz(T x)
__NOEXC {
897 return __sycl_std::__invoke_clz<T>(x);
901 template <
typename T>
902 detail::enable_if_t<detail::is_geninteger<T>::value, T> ctz(T x)
__NOEXC {
903 return __sycl_std::__invoke_ctz<T>(x);
907 namespace ext::intel {
908 template <
typename T>
910 "'sycl::ext::intel::ctz' is deprecated, use 'sycl::ctz' instead")
918 using namespace ext::intel;
922 template <
typename T>
923 detail::enable_if_t<detail::is_igeninteger<T>::value, T> mad_hi(T x, T y,
925 return __sycl_std::__invoke_s_mad_hi<T>(x, y, z);
929 template <
typename T>
930 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> mad_hi(T x, T y,
932 return __sycl_std::__invoke_u_mad_hi<T>(x, y, z);
936 template <
typename T>
937 detail::enable_if_t<detail::is_igeninteger<T>::value, T> mad_sat(T a, T b,
939 return __sycl_std::__invoke_s_mad_sat<T>(a, b, c);
943 template <
typename T>
944 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> mad_sat(T a, T b,
946 return __sycl_std::__invoke_u_mad_sat<T>(a, b, c);
950 template <
typename T>
951 detail::enable_if_t<detail::is_igeninteger<T>::value, T>(
max)(T x,
953 return __sycl_std::__invoke_s_max<T>(x, y);
957 template <
typename T>
958 detail::enable_if_t<detail::is_ugeninteger<T>::value, T>(
max)(T x,
960 return __sycl_std::__invoke_u_max<T>(x, y);
964 template <
typename T>
965 detail::enable_if_t<detail::is_vigeninteger<T>::value, T>(
max)(
966 T x,
typename T::element_type y)
__NOEXC {
967 return __sycl_std::__invoke_s_max<T>(x, T(y));
971 template <
typename T>
972 detail::enable_if_t<detail::is_vugeninteger<T>::value, T>(
max)(
973 T x,
typename T::element_type y)
__NOEXC {
974 return __sycl_std::__invoke_u_max<T>(x, T(y));
978 template <
typename T>
979 detail::enable_if_t<detail::is_igeninteger<T>::value, T>(
min)(T x,
981 return __sycl_std::__invoke_s_min<T>(x, y);
985 template <
typename T>
986 detail::enable_if_t<detail::is_ugeninteger<T>::value, T>(
min)(T x,
988 return __sycl_std::__invoke_u_min<T>(x, y);
992 template <
typename T>
993 detail::enable_if_t<detail::is_vigeninteger<T>::value, T>(
min)(
994 T x,
typename T::element_type y)
__NOEXC {
995 return __sycl_std::__invoke_s_min<T>(x, T(y));
999 template <
typename T>
1000 detail::enable_if_t<detail::is_vugeninteger<T>::value, T>(
min)(
1001 T x,
typename T::element_type y)
__NOEXC {
1002 return __sycl_std::__invoke_u_min<T>(x, T(y));
1006 template <
typename T>
1007 detail::enable_if_t<detail::is_igeninteger<T>::value, T> mul_hi(T x,
1009 return __sycl_std::__invoke_s_mul_hi<T>(x, y);
1013 template <
typename T>
1014 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> mul_hi(T x,
1016 return __sycl_std::__invoke_u_mul_hi<T>(x, y);
1020 template <
typename T>
1021 detail::enable_if_t<detail::is_geninteger<T>::value, T> rotate(T v,
1023 return __sycl_std::__invoke_rotate<T>(v, i);
1027 template <
typename T>
1028 detail::enable_if_t<detail::is_igeninteger<T>::value, T> sub_sat(T x,
1030 return __sycl_std::__invoke_s_sub_sat<T>(x, y);
1034 template <
typename T>
1035 detail::enable_if_t<detail::is_ugeninteger<T>::value, T> sub_sat(T x,
1037 return __sycl_std::__invoke_u_sub_sat<T>(x, y);
1041 template <
typename T>
1042 detail::enable_if_t<detail::is_ugeninteger8bit<T>::value,
1043 detail::make_larger_t<T>>
1044 upsample(T hi, T lo)
__NOEXC {
1045 return __sycl_std::__invoke_u_upsample<detail::make_larger_t<T>>(hi, lo);
1049 template <
typename T,
typename T2>
1050 detail::enable_if_t<detail::is_igeninteger8bit<T>::value &&
1051 detail::is_ugeninteger8bit<T2>::value,
1052 detail::make_larger_t<T>>
1053 upsample(T hi, T2 lo)
__NOEXC {
1054 detail::check_vector_size<T, T2>();
1055 return __sycl_std::__invoke_s_upsample<detail::make_larger_t<T>>(hi, lo);
1059 template <
typename T>
1060 detail::enable_if_t<detail::is_ugeninteger16bit<T>::value,
1061 detail::make_larger_t<T>>
1062 upsample(T hi, T lo)
__NOEXC {
1063 return __sycl_std::__invoke_u_upsample<detail::make_larger_t<T>>(hi, lo);
1067 template <
typename T,
typename T2>
1068 detail::enable_if_t<detail::is_igeninteger16bit<T>::value &&
1069 detail::is_ugeninteger16bit<T2>::value,
1070 detail::make_larger_t<T>>
1071 upsample(T hi, T2 lo)
__NOEXC {
1072 detail::check_vector_size<T, T2>();
1073 return __sycl_std::__invoke_s_upsample<detail::make_larger_t<T>>(hi, lo);
1077 template <
typename T>
1078 detail::enable_if_t<detail::is_ugeninteger32bit<T>::value,
1079 detail::make_larger_t<T>>
1080 upsample(T hi, T lo)
__NOEXC {
1081 return __sycl_std::__invoke_u_upsample<detail::make_larger_t<T>>(hi, lo);
1085 template <
typename T,
typename T2>
1086 detail::enable_if_t<detail::is_igeninteger32bit<T>::value &&
1087 detail::is_ugeninteger32bit<T2>::value,
1088 detail::make_larger_t<T>>
1089 upsample(T hi, T2 lo)
__NOEXC {
1090 detail::check_vector_size<T, T2>();
1091 return __sycl_std::__invoke_s_upsample<detail::make_larger_t<T>>(hi, lo);
1095 template <
typename T>
1096 detail::enable_if_t<detail::is_geninteger<T>::value, T>
popcount(T x)
__NOEXC {
1097 return __sycl_std::__invoke_popcount<T>(x);
1102 template <
typename T>
1103 detail::enable_if_t<detail::is_igeninteger32bit<T>::value, T>
1104 mad24(T x, T y, T z)
__NOEXC {
1105 return __sycl_std::__invoke_s_mad24<T>(x, y, z);
1110 template <
typename T>
1111 detail::enable_if_t<detail::is_ugeninteger32bit<T>::value, T>
1112 mad24(T x, T y, T z)
__NOEXC {
1113 return __sycl_std::__invoke_u_mad24<T>(x, y, z);
1117 template <
typename T>
1118 detail::enable_if_t<detail::is_igeninteger32bit<T>::value, T>
1120 return __sycl_std::__invoke_s_mul24<T>(x, y);
1124 template <
typename T>
1125 detail::enable_if_t<detail::is_ugeninteger32bit<T>::value, T>
1127 return __sycl_std::__invoke_u_mul24<T>(x, y);
1137 template <
typename T>
1138 detail::enable_if_t<detail::is_gencross<T>::value, T> cross(T p0,
1140 return __sycl_std::__invoke_cross<T>(p0, p1);
1146 template <
typename T>
1147 detail::enable_if_t<detail::is_sgenfloat<T>::value, T> dot(T p0, T p1)
__NOEXC {
1152 template <
typename T>
1153 detail::enable_if_t<detail::is_vgengeofloat<T>::value,
float>
1155 return __sycl_std::__invoke_Dot<float>(p0, p1);
1159 template <
typename T>
1160 detail::enable_if_t<detail::is_vgengeodouble<T>::value,
double>
1162 return __sycl_std::__invoke_Dot<double>(p0, p1);
1166 template <
typename T>
1167 detail::enable_if_t<detail::is_vgengeohalf<T>::value,
half> dot(T p0,
1169 return __sycl_std::__invoke_Dot<half>(p0, p1);
1173 template <
typename T,
1174 typename = detail::enable_if_t<detail::is_gengeofloat<T>::value, T>>
1175 float distance(T p0, T p1)
__NOEXC {
1176 return __sycl_std::__invoke_distance<float>(p0, p1);
1180 template <
typename T,
1181 typename = detail::enable_if_t<detail::is_gengeodouble<T>::value, T>>
1182 double distance(T p0, T p1)
__NOEXC {
1183 return __sycl_std::__invoke_distance<double>(p0, p1);
1187 template <
typename T,
1188 typename = detail::enable_if_t<detail::is_gengeohalf<T>::value, T>>
1190 return __sycl_std::__invoke_distance<half>(p0, p1);
1194 template <
typename T,
1195 typename = detail::enable_if_t<detail::is_gengeofloat<T>::value, T>>
1197 return __sycl_std::__invoke_length<float>(p);
1201 template <
typename T,
1202 typename = detail::enable_if_t<detail::is_gengeodouble<T>::value, T>>
1204 return __sycl_std::__invoke_length<double>(p);
1208 template <
typename T,
1209 typename = detail::enable_if_t<detail::is_gengeohalf<T>::value, T>>
1211 return __sycl_std::__invoke_length<half>(p);
1215 template <
typename T>
1216 detail::enable_if_t<detail::is_gengeofloat<T>::value, T>
1218 return __sycl_std::__invoke_normalize<T>(p);
1222 template <
typename T>
1223 detail::enable_if_t<detail::is_gengeodouble<T>::value, T>
1225 return __sycl_std::__invoke_normalize<T>(p);
1229 template <
typename T>
1230 detail::enable_if_t<detail::is_gengeohalf<T>::value, T> normalize(T p)
__NOEXC {
1231 return __sycl_std::__invoke_normalize<T>(p);
1235 template <
typename T,
1236 typename = detail::enable_if_t<detail::is_gengeofloat<T>::value, T>>
1237 float fast_distance(T p0, T p1)
__NOEXC {
1238 return __sycl_std::__invoke_fast_distance<float>(p0, p1);
1242 template <
typename T,
1243 typename = detail::enable_if_t<detail::is_gengeodouble<T>::value, T>>
1244 double fast_distance(T p0, T p1)
__NOEXC {
1245 return __sycl_std::__invoke_fast_distance<double>(p0, p1);
1249 template <
typename T,
1250 typename = detail::enable_if_t<detail::is_gengeofloat<T>::value, T>>
1251 float fast_length(T p)
__NOEXC {
1252 return __sycl_std::__invoke_fast_length<float>(p);
1256 template <
typename T,
1257 typename = detail::enable_if_t<detail::is_gengeodouble<T>::value, T>>
1258 double fast_length(T p)
__NOEXC {
1259 return __sycl_std::__invoke_fast_length<double>(p);
1263 template <
typename T>
1264 detail::enable_if_t<detail::is_gengeofloat<T>::value, T>
1266 return __sycl_std::__invoke_fast_normalize<T>(p);
1270 template <
typename T>
1271 detail::enable_if_t<detail::is_gengeodouble<T>::value, T>
1273 return __sycl_std::__invoke_fast_normalize<T>(p);
1279 template <
typename T,
1280 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1281 detail::common_rel_ret_t<T> isequal(T x, T y)
__NOEXC {
1282 return detail::RelConverter<T>::apply(
1283 __sycl_std::__invoke_FOrdEqual<detail::internal_rel_ret_t<T>>(x, y));
1286 template <
typename T,
1287 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1288 detail::common_rel_ret_t<T> isnotequal(T x, T y)
__NOEXC {
1289 return detail::RelConverter<T>::apply(
1290 __sycl_std::__invoke_FUnordNotEqual<detail::internal_rel_ret_t<T>>(x, y));
1293 template <
typename T,
1294 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1295 detail::common_rel_ret_t<T> isgreater(T x, T y)
__NOEXC {
1296 return detail::RelConverter<T>::apply(
1297 __sycl_std::__invoke_FOrdGreaterThan<detail::internal_rel_ret_t<T>>(x,
1301 template <
typename T,
1302 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1303 detail::common_rel_ret_t<T> isgreaterequal(T x, T y)
__NOEXC {
1304 return detail::RelConverter<T>::apply(
1305 __sycl_std::__invoke_FOrdGreaterThanEqual<detail::internal_rel_ret_t<T>>(
1309 template <
typename T,
1310 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1311 detail::common_rel_ret_t<T> isless(T x, T y)
__NOEXC {
1312 return detail::RelConverter<T>::apply(
1313 __sycl_std::__invoke_FOrdLessThan<detail::internal_rel_ret_t<T>>(x, y));
1316 template <
typename T,
1317 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1318 detail::common_rel_ret_t<T> islessequal(T x, T y)
__NOEXC {
1319 return detail::RelConverter<T>::apply(
1320 __sycl_std::__invoke_FOrdLessThanEqual<detail::internal_rel_ret_t<T>>(x,
1324 template <
typename T,
1325 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1326 detail::common_rel_ret_t<T> islessgreater(T x, T y)
__NOEXC {
1327 return detail::RelConverter<T>::apply(
1328 __sycl_std::__invoke_FOrdNotEqual<detail::internal_rel_ret_t<T>>(x, y));
1331 template <
typename T,
1332 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1333 detail::common_rel_ret_t<T> isfinite(T x)
__NOEXC {
1334 return detail::RelConverter<T>::apply(
1335 __sycl_std::__invoke_IsFinite<detail::internal_rel_ret_t<T>>(x));
1338 template <
typename T,
1339 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1340 detail::common_rel_ret_t<T> isinf(T x)
__NOEXC {
1341 return detail::RelConverter<T>::apply(
1342 __sycl_std::__invoke_IsInf<detail::internal_rel_ret_t<T>>(x));
1345 template <
typename T,
1346 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1348 return detail::RelConverter<T>::apply(
1349 __sycl_std::__invoke_IsNan<detail::internal_rel_ret_t<T>>(x));
1352 template <
typename T,
1353 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1354 detail::common_rel_ret_t<T> isnormal(T x)
__NOEXC {
1355 return detail::RelConverter<T>::apply(
1356 __sycl_std::__invoke_IsNormal<detail::internal_rel_ret_t<T>>(x));
1359 template <
typename T,
1360 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1361 detail::common_rel_ret_t<T> isordered(T x, T y)
__NOEXC {
1362 return detail::RelConverter<T>::apply(
1363 __sycl_std::__invoke_Ordered<detail::internal_rel_ret_t<T>>(x, y));
1366 template <
typename T,
1367 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1368 detail::common_rel_ret_t<T> isunordered(T x, T y)
__NOEXC {
1369 return detail::RelConverter<T>::apply(
1370 __sycl_std::__invoke_Unordered<detail::internal_rel_ret_t<T>>(x, y));
1373 template <
typename T,
1374 typename = detail::enable_if_t<detail::is_svgenfloat<T>::value, T>>
1375 detail::common_rel_ret_t<T> signbit(T x)
__NOEXC {
1376 return detail::RelConverter<T>::apply(
1377 __sycl_std::__invoke_SignBitSet<detail::internal_rel_ret_t<T>>(x));
1381 #if defined(SYCL2020_CONFORMANT_APIS) && SYCL_LANGUAGE_VERSION >= 202001
1382 using anyall_ret_t = bool;
1384 using anyall_ret_t = int;
1389 template <
typename T>
1390 detail::enable_if_t<detail::is_sigeninteger<T>::value, detail::anyall_ret_t>
1396 template <
typename T>
1397 detail::enable_if_t<detail::is_vigeninteger<T>::value, detail::anyall_ret_t>
1399 return detail::rel_sign_bit_test_ret_t<T>(
1400 __sycl_std::__invoke_Any<detail::rel_sign_bit_test_ret_t<T>>(
1401 detail::rel_sign_bit_test_arg_t<T>(x)));
1405 template <
typename T>
1406 detail::enable_if_t<detail::is_sigeninteger<T>::value, detail::anyall_ret_t>
1412 template <
typename T>
1413 detail::enable_if_t<detail::is_vigeninteger<T>::value, detail::anyall_ret_t>
1415 return detail::rel_sign_bit_test_ret_t<T>(
1416 __sycl_std::__invoke_All<detail::rel_sign_bit_test_ret_t<T>>(
1417 detail::rel_sign_bit_test_arg_t<T>(x)));
1421 template <
typename T>
1422 detail::enable_if_t<detail::is_gentype<T>::value, T> bitselect(T a, T b,
1424 return __sycl_std::__invoke_bitselect<T>(a, b, c);
1428 template <
typename T>
1429 detail::enable_if_t<detail::is_sgentype<T>::value, T> select(T a, T b,
1431 return __sycl_std::__invoke_select<T>(a, b,
static_cast<int>(c));
1435 template <
typename T,
typename T2>
1437 detail::is_geninteger<T>::value && detail::is_igeninteger<T2>::value, T>
1438 select(T a, T b, T2 c)
__NOEXC {
1439 detail::check_vector_size<T, T2>();
1440 return __sycl_std::__invoke_select<T>(a, b, c);
1444 template <
typename T,
typename T2>
1446 detail::is_geninteger<T>::value && detail::is_ugeninteger<T2>::value, T>
1447 select(T a, T b, T2 c)
__NOEXC {
1448 detail::check_vector_size<T, T2>();
1449 return __sycl_std::__invoke_select<T>(a, b, c);
1453 template <
typename T,
typename T2>
1455 detail::is_svgenfloatf<T>::value && detail::is_genint<T2>::value, T>
1456 select(T a, T b, T2 c)
__NOEXC {
1457 detail::check_vector_size<T, T2>();
1458 return __sycl_std::__invoke_select<T>(a, b, c);
1462 template <
typename T,
typename T2>
1464 detail::is_svgenfloatf<T>::value && detail::is_ugenint<T2>::value, T>
1465 select(T a, T b, T2 c)
__NOEXC {
1466 detail::check_vector_size<T, T2>();
1467 return __sycl_std::__invoke_select<T>(a, b, c);
1471 template <
typename T,
typename T2>
1472 detail::enable_if_t<detail::is_svgenfloatd<T>::value &&
1473 detail::is_igeninteger64bit<T2>::value,
1475 select(T a, T b, T2 c)
__NOEXC {
1476 detail::check_vector_size<T, T2>();
1477 return __sycl_std::__invoke_select<T>(a, b, c);
1481 template <
typename T,
typename T2>
1482 detail::enable_if_t<detail::is_svgenfloatd<T>::value &&
1483 detail::is_ugeninteger64bit<T2>::value,
1485 select(T a, T b, T2 c)
__NOEXC {
1486 detail::check_vector_size<T, T2>();
1487 return __sycl_std::__invoke_select<T>(a, b, c);
1491 template <
typename T,
typename T2>
1492 detail::enable_if_t<detail::is_svgenfloath<T>::value &&
1493 detail::is_igeninteger16bit<T2>::value,
1495 select(T a, T b, T2 c)
__NOEXC {
1496 detail::check_vector_size<T, T2>();
1497 return __sycl_std::__invoke_select<T>(a, b, c);
1501 template <
typename T,
typename T2>
1502 detail::enable_if_t<detail::is_svgenfloath<T>::value &&
1503 detail::is_ugeninteger16bit<T2>::value,
1505 select(T a, T b, T2 c)
__NOEXC {
1506 detail::check_vector_size<T, T2>();
1507 return __sycl_std::__invoke_select<T>(a, b, c);
1513 #define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME) \
1514 template <size_t N> \
1515 inline __SYCL_ALWAYS_INLINE marray<float, N> NAME(marray<float, N> x) \
1517 marray<float, N> res; \
1518 for (size_t i = 0; i < N / 2; i++) { \
1519 auto partial_res = __sycl_std::__invoke_native_##NAME<vec<float, 2>>( \
1520 detail::to_vec2(x, i * 2)); \
1521 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<float, 2>)); \
1524 res[N - 1] = __sycl_std::__invoke_native_##NAME<float>(x[N - 1]); \
1542 #undef __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD
1544 #define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(NAME) \
1545 template <size_t N> \
1546 inline __SYCL_ALWAYS_INLINE marray<float, N> NAME( \
1547 marray<float, N> x, marray<float, N> y) __NOEXC { \
1548 marray<float, N> res; \
1549 for (size_t i = 0; i < N / 2; i++) { \
1550 auto partial_res = __sycl_std::__invoke_native_##NAME<vec<float, 2>>( \
1551 detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \
1552 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<float, 2>)); \
1556 __sycl_std::__invoke_native_##NAME<float>(x[N - 1], y[N - 1]); \
1564 #undef __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD
1567 template <
typename T>
1568 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
cos(T x)
__NOEXC {
1569 return __sycl_std::__invoke_native_cos<T>(x);
1573 template <
typename T>
1574 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> divide(T x,
1576 return __sycl_std::__invoke_native_divide<T>(x, y);
1580 template <
typename T>
1581 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp(T x)
__NOEXC {
1582 return __sycl_std::__invoke_native_exp<T>(x);
1586 template <
typename T>
1587 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp2(T x)
__NOEXC {
1588 return __sycl_std::__invoke_native_exp2<T>(x);
1592 template <
typename T>
1593 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> exp10(T x)
__NOEXC {
1594 return __sycl_std::__invoke_native_exp10<T>(x);
1598 template <
typename T>
1599 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log(T x)
__NOEXC {
1600 return __sycl_std::__invoke_native_log<T>(x);
1604 template <
typename T>
1605 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log2(T x)
__NOEXC {
1606 return __sycl_std::__invoke_native_log2<T>(x);
1610 template <
typename T>
1611 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> log10(T x)
__NOEXC {
1612 return __sycl_std::__invoke_native_log10<T>(x);
1616 template <
typename T>
1617 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
powr(T x,
1619 return __sycl_std::__invoke_native_powr<T>(x, y);
1623 template <
typename T>
1624 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> recip(T x)
__NOEXC {
1625 return __sycl_std::__invoke_native_recip<T>(x);
1629 template <
typename T>
1630 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
rsqrt(T x)
__NOEXC {
1631 return __sycl_std::__invoke_native_rsqrt<T>(x);
1635 template <
typename T>
1636 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sin(T x)
__NOEXC {
1637 return __sycl_std::__invoke_native_sin<T>(x);
1641 template <
typename T>
1642 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sqrt(T x)
__NOEXC {
1643 return __sycl_std::__invoke_native_sqrt<T>(x);
1647 template <
typename T>
1648 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> tan(T x)
__NOEXC {
1649 return __sycl_std::__invoke_native_tan<T>(x);
1653 namespace half_precision {
1655 #define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(NAME) \
1656 template <size_t N> \
1657 inline __SYCL_ALWAYS_INLINE marray<float, N> NAME(marray<float, N> x) \
1659 marray<float, N> res; \
1660 for (size_t i = 0; i < N / 2; i++) { \
1661 auto partial_res = __sycl_std::__invoke_half_##NAME<vec<float, 2>>( \
1662 detail::to_vec2(x, i * 2)); \
1663 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<float, 2>)); \
1666 res[N - 1] = __sycl_std::__invoke_half_##NAME<float>(x[N - 1]); \
1683 #undef __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD
1685 #define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(NAME) \
1686 template <size_t N> \
1687 inline __SYCL_ALWAYS_INLINE marray<float, N> NAME( \
1688 marray<float, N> x, marray<float, N> y) __NOEXC { \
1689 marray<float, N> res; \
1690 for (size_t i = 0; i < N / 2; i++) { \
1691 auto partial_res = __sycl_std::__invoke_half_##NAME<vec<float, 2>>( \
1692 detail::to_vec2(x, i * 2), detail::to_vec2(y, i * 2)); \
1693 std::memcpy(&res[i * 2], &partial_res, sizeof(vec<float, 2>)); \
1697 __sycl_std::__invoke_half_##NAME<float>(x[N - 1], y[N - 1]); \
1705 #undef __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD
1708 template <
typename T>
1709 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
cos(T x)
__NOEXC {
1710 return __sycl_std::__invoke_half_cos<T>(x);
1714 template <
typename T>
1715 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> divide(T x,
1717 return __sycl_std::__invoke_half_divide<T>(x, y);
1721 template <
typename T>
1722 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp(T x)
__NOEXC {
1723 return __sycl_std::__invoke_half_exp<T>(x);
1727 template <
typename T>
1728 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp2(T x)
__NOEXC {
1729 return __sycl_std::__invoke_half_exp2<T>(x);
1733 template <
typename T>
1734 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> exp10(T x)
__NOEXC {
1735 return __sycl_std::__invoke_half_exp10<T>(x);
1739 template <
typename T>
1740 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log(T x)
__NOEXC {
1741 return __sycl_std::__invoke_half_log<T>(x);
1745 template <
typename T>
1746 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log2(T x)
__NOEXC {
1747 return __sycl_std::__invoke_half_log2<T>(x);
1751 template <
typename T>
1752 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> log10(T x)
__NOEXC {
1753 return __sycl_std::__invoke_half_log10<T>(x);
1757 template <
typename T>
1758 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
powr(T x,
1760 return __sycl_std::__invoke_half_powr<T>(x, y);
1764 template <
typename T>
1765 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> recip(T x)
__NOEXC {
1766 return __sycl_std::__invoke_half_recip<T>(x);
1770 template <
typename T>
1771 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
rsqrt(T x)
__NOEXC {
1772 return __sycl_std::__invoke_half_rsqrt<T>(x);
1776 template <
typename T>
1777 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sin(T x)
__NOEXC {
1778 return __sycl_std::__invoke_half_sin<T>(x);
1782 template <
typename T>
1783 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sqrt(T x)
__NOEXC {
1784 return __sycl_std::__invoke_half_sqrt<T>(x);
1788 template <
typename T>
1789 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> tan(T x)
__NOEXC {
1790 return __sycl_std::__invoke_half_tan<T>(x);
1795 #ifdef __FAST_MATH__
1798 #define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME) \
1799 template <typename T, size_t N> \
1800 inline __SYCL_ALWAYS_INLINE \
1801 std::enable_if_t<std::is_same_v<T, float>, marray<T, N>> \
1802 NAME(marray<T, N> x) __NOEXC { \
1803 return native::NAME(x); \
1817 #undef __SYCL_MATH_FUNCTION_OVERLOAD_FM
1819 template <
typename T,
size_t N>
1821 std::enable_if_t<std::is_same_v<T, float>, marray<T, N>>
1827 template <
typename T>
1828 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
cos(T x)
__NOEXC {
1833 template <
typename T>
1834 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp(T x)
__NOEXC {
1839 template <
typename T>
1840 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
exp2(T x)
__NOEXC {
1845 template <
typename T>
1846 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> exp10(T x)
__NOEXC {
1847 return native::exp10(x);
1851 template <
typename T>
1852 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log(T x)
__NOEXC {
1857 template <
typename T>
1858 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
log2(T x)
__NOEXC {
1863 template <
typename T>
1864 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> log10(T x)
__NOEXC {
1865 return native::log10(x);
1869 template <
typename T>
1870 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
powr(T x,
1876 template <
typename T>
1877 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
rsqrt(T x)
__NOEXC {
1882 template <
typename T>
1883 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sin(T x)
__NOEXC {
1888 template <
typename T>
1889 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T>
sqrt(T x)
__NOEXC {
1894 template <
typename T>
1895 detail::enable_if_t<detail::is_svgenfloatf<T>::value, T> tan(T x)
__NOEXC {
1896 return native::tan(x);
1903 #ifdef __SYCL_DEVICE_ONLY__
1911 extern SYCL_EXTERNAL lldiv_t lldiv(
long long int x,
long long int y);
1921 extern SYCL_EXTERNAL double modf(
double x,
double *intpart);
1950 extern SYCL_EXTERNAL float remquof(
float x,
float y,
int *q);
1951 extern SYCL_EXTERNAL double remquo(
double x,
double y,
int *q);
1992 extern SYCL_EXTERNAL void *memset(
void *dest,
int c,
size_t n);
1993 extern SYCL_EXTERNAL int memcmp(
const void *s1,
const void *s2,
size_t n);
1994 extern SYCL_EXTERNAL long long int __imf_llmax(
long long int x,
1996 extern SYCL_EXTERNAL long long int __imf_llmin(
long long int x,
1999 __imf_ullmax(
unsigned long long int x,
unsigned long long int y);
2001 __imf_ullmin(
unsigned long long int x,
unsigned long long int y);
2002 extern SYCL_EXTERNAL unsigned int __imf_umax(
unsigned int x,
unsigned int y);
2003 extern SYCL_EXTERNAL unsigned int __imf_umin(
unsigned int x,
unsigned int y);
2004 extern SYCL_EXTERNAL unsigned int __imf_brev(
unsigned int x);
2006 __imf_brevll(
unsigned long long int x);
2008 __imf_byte_perm(
unsigned int x,
unsigned int y,
unsigned int s);
2014 extern SYCL_EXTERNAL int __imf_popcll(
unsigned long long int x);
2015 extern SYCL_EXTERNAL unsigned int __imf_sad(
int x,
int y,
unsigned int z);
2016 extern SYCL_EXTERNAL unsigned int __imf_usad(
unsigned int x,
unsigned int y,
2019 extern SYCL_EXTERNAL unsigned int __imf_urhadd(
unsigned int x,
unsigned int y);
2020 extern SYCL_EXTERNAL unsigned int __imf_uhadd(
unsigned int x,
unsigned int y);
2022 extern SYCL_EXTERNAL unsigned int __imf_umul24(
unsigned int x,
unsigned int y);
2024 extern SYCL_EXTERNAL unsigned int __imf_umulhi(
unsigned int x,
unsigned int y);
2025 extern SYCL_EXTERNAL long long int __imf_mul64hi(
long long int x,
2028 __imf_umul64hi(
unsigned long long int x,
unsigned long long int y);
2030 extern SYCL_EXTERNAL float __imf_fmaf(
float x,
float y,
float z);
2047 extern SYCL_EXTERNAL unsigned int __imf_float2uint_rd(
float x);
2048 extern SYCL_EXTERNAL unsigned int __imf_float2uint_rn(
float x);
2049 extern SYCL_EXTERNAL unsigned int __imf_float2uint_ru(
float x);
2050 extern SYCL_EXTERNAL unsigned int __imf_float2uint_rz(
float x);
2051 extern SYCL_EXTERNAL long long int __imf_float2ll_rd(
float x);
2052 extern SYCL_EXTERNAL long long int __imf_float2ll_rn(
float x);
2053 extern SYCL_EXTERNAL long long int __imf_float2ll_ru(
float x);
2054 extern SYCL_EXTERNAL long long int __imf_float2ll_rz(
float x);
2055 extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rd(
float x);
2056 extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rn(
float x);
2057 extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_ru(
float x);
2058 extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rz(
float x);
2060 extern SYCL_EXTERNAL unsigned int __imf_float_as_uint(
float x);
2066 extern SYCL_EXTERNAL float __imf_ll2float_rd(
long long int x);
2067 extern SYCL_EXTERNAL float __imf_ll2float_rn(
long long int x);
2068 extern SYCL_EXTERNAL float __imf_ll2float_ru(
long long int x);
2069 extern SYCL_EXTERNAL float __imf_ll2float_rz(
long long int x);
2070 extern SYCL_EXTERNAL float __imf_uint2float_rd(
unsigned int x);
2071 extern SYCL_EXTERNAL float __imf_uint2float_rn(
unsigned int x);
2072 extern SYCL_EXTERNAL float __imf_uint2float_ru(
unsigned int x);
2073 extern SYCL_EXTERNAL float __imf_uint2float_rz(
unsigned int x);
2074 extern SYCL_EXTERNAL float __imf_uint_as_float(
unsigned int x);
2075 extern SYCL_EXTERNAL float __imf_ull2float_rd(
unsigned long long int x);
2076 extern SYCL_EXTERNAL float __imf_ull2float_rn(
unsigned long long int x);
2077 extern SYCL_EXTERNAL float __imf_ull2float_ru(
unsigned long long int x);
2078 extern SYCL_EXTERNAL float __imf_ull2float_rz(
unsigned long long int x);
2088 extern SYCL_EXTERNAL long long __imf_half2ll_rd(_Float16 x);
2089 extern SYCL_EXTERNAL long long __imf_half2ll_rn(_Float16 x);
2090 extern SYCL_EXTERNAL long long __imf_half2ll_ru(_Float16 x);
2091 extern SYCL_EXTERNAL long long __imf_half2ll_rz(_Float16 x);
2096 extern SYCL_EXTERNAL unsigned int __imf_half2uint_rd(_Float16 x);
2097 extern SYCL_EXTERNAL unsigned int __imf_half2uint_rn(_Float16 x);
2098 extern SYCL_EXTERNAL unsigned int __imf_half2uint_ru(_Float16 x);
2099 extern SYCL_EXTERNAL unsigned int __imf_half2uint_rz(_Float16 x);
2100 extern SYCL_EXTERNAL unsigned long long __imf_half2ull_rd(_Float16 x);
2101 extern SYCL_EXTERNAL unsigned long long __imf_half2ull_rn(_Float16 x);
2102 extern SYCL_EXTERNAL unsigned long long __imf_half2ull_ru(_Float16 x);
2103 extern SYCL_EXTERNAL unsigned long long __imf_half2ull_rz(_Float16 x);
2104 extern SYCL_EXTERNAL unsigned short __imf_half2ushort_rd(_Float16 x);
2105 extern SYCL_EXTERNAL unsigned short __imf_half2ushort_rn(_Float16 x);
2106 extern SYCL_EXTERNAL unsigned short __imf_half2ushort_ru(_Float16 x);
2107 extern SYCL_EXTERNAL unsigned short __imf_half2ushort_rz(_Float16 x);
2109 extern SYCL_EXTERNAL unsigned short __imf_half_as_ushort(_Float16 x);
2114 extern SYCL_EXTERNAL _Float16 __imf_ll2half_rd(
long long x);
2115 extern SYCL_EXTERNAL _Float16 __imf_ll2half_rn(
long long x);
2116 extern SYCL_EXTERNAL _Float16 __imf_ll2half_ru(
long long x);
2117 extern SYCL_EXTERNAL _Float16 __imf_ll2half_rz(
long long x);
2123 extern SYCL_EXTERNAL _Float16 __imf_uint2half_rd(
unsigned int x);
2124 extern SYCL_EXTERNAL _Float16 __imf_uint2half_rn(
unsigned int x);
2125 extern SYCL_EXTERNAL _Float16 __imf_uint2half_ru(
unsigned int x);
2126 extern SYCL_EXTERNAL _Float16 __imf_uint2half_rz(
unsigned int x);
2127 extern SYCL_EXTERNAL _Float16 __imf_ull2half_rd(
unsigned long long x);
2128 extern SYCL_EXTERNAL _Float16 __imf_ull2half_rn(
unsigned long long x);
2129 extern SYCL_EXTERNAL _Float16 __imf_ull2half_ru(
unsigned long long x);
2130 extern SYCL_EXTERNAL _Float16 __imf_ull2half_rz(
unsigned long long x);
2131 extern SYCL_EXTERNAL _Float16 __imf_ushort2half_rd(
unsigned short x);
2132 extern SYCL_EXTERNAL _Float16 __imf_ushort2half_rn(
unsigned short x);
2133 extern SYCL_EXTERNAL _Float16 __imf_ushort2half_ru(
unsigned short x);
2134 extern SYCL_EXTERNAL _Float16 __imf_ushort2half_rz(
unsigned short x);
2135 extern SYCL_EXTERNAL _Float16 __imf_ushort_as_half(
unsigned short x);
2138 extern SYCL_EXTERNAL _Float16 __imf_fmaf16(_Float16 x, _Float16 y, _Float16 z);
2144 extern SYCL_EXTERNAL _Float16 __imf_nearbyintf16(_Float16 x);
2148 extern SYCL_EXTERNAL _Float16 __imf_fmaxf16(_Float16 x, _Float16 y);
2149 extern SYCL_EXTERNAL _Float16 __imf_fminf16(_Float16 x, _Float16 y);
2152 extern SYCL_EXTERNAL float __imf_bfloat162float(uint16_t x);
2153 extern SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rd(uint16_t x);
2154 extern SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rn(uint16_t x);
2155 extern SYCL_EXTERNAL unsigned int __imf_bfloat162uint_ru(uint16_t x);
2156 extern SYCL_EXTERNAL unsigned int __imf_bfloat162uint_rz(uint16_t x);
2157 extern SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rd(uint16_t x);
2158 extern SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rn(uint16_t x);
2159 extern SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_ru(uint16_t x);
2160 extern SYCL_EXTERNAL unsigned short __imf_bfloat162ushort_rz(uint16_t x);
2161 extern SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rd(uint16_t x);
2162 extern SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rn(uint16_t x);
2163 extern SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_ru(uint16_t x);
2164 extern SYCL_EXTERNAL unsigned long long __imf_bfloat162ull_rz(uint16_t x);
2169 extern SYCL_EXTERNAL short __imf_bfloat162short_rd(uint16_t x);
2170 extern SYCL_EXTERNAL short __imf_bfloat162short_rn(uint16_t x);
2171 extern SYCL_EXTERNAL short __imf_bfloat162short_ru(uint16_t x);
2172 extern SYCL_EXTERNAL short __imf_bfloat162short_rz(uint16_t x);
2173 extern SYCL_EXTERNAL long long __imf_bfloat162ll_rd(uint16_t x);
2174 extern SYCL_EXTERNAL long long __imf_bfloat162ll_rn(uint16_t x);
2175 extern SYCL_EXTERNAL long long __imf_bfloat162ll_ru(uint16_t x);
2176 extern SYCL_EXTERNAL long long __imf_bfloat162ll_rz(uint16_t x);
2177 extern SYCL_EXTERNAL uint16_t __imf_float2bfloat16(
float x);
2178 extern SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rd(
float x);
2179 extern SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rn(
float x);
2180 extern SYCL_EXTERNAL uint16_t __imf_float2bfloat16_ru(
float x);
2181 extern SYCL_EXTERNAL uint16_t __imf_float2bfloat16_rz(
float x);
2182 extern SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rd(
unsigned short x);
2183 extern SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rn(
unsigned short x);
2184 extern SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_ru(
unsigned short x);
2185 extern SYCL_EXTERNAL uint16_t __imf_ushort2bfloat16_rz(
unsigned short x);
2186 extern SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rd(
unsigned int x);
2187 extern SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rn(
unsigned int x);
2188 extern SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_ru(
unsigned int x);
2189 extern SYCL_EXTERNAL uint16_t __imf_uint2bfloat16_rz(
unsigned int x);
2190 extern SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rd(
unsigned long long x);
2191 extern SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rn(
unsigned long long x);
2192 extern SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_ru(
unsigned long long x);
2193 extern SYCL_EXTERNAL uint16_t __imf_ull2bfloat16_rz(
unsigned long long x);
2194 extern SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rd(
short x);
2195 extern SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rn(
short x);
2196 extern SYCL_EXTERNAL uint16_t __imf_short2bfloat16_ru(
short x);
2197 extern SYCL_EXTERNAL uint16_t __imf_short2bfloat16_rz(
short x);
2202 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rd(
long long x);
2203 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rn(
long long x);
2204 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_ru(
long long x);
2205 extern SYCL_EXTERNAL uint16_t __imf_ll2bfloat16_rz(
long long x);
2206 extern SYCL_EXTERNAL short __imf_bfloat16_as_short(uint16_t x);
2207 extern SYCL_EXTERNAL unsigned short __imf_bfloat16_as_ushort(uint16_t x);
2208 extern SYCL_EXTERNAL uint16_t __imf_short_as_bfloat16(
short x);
2209 extern SYCL_EXTERNAL uint16_t __imf_ushort_as_bfloat16(
unsigned short x);
2210 extern SYCL_EXTERNAL uint16_t __imf_fmabf16(uint16_t x, uint16_t y, uint16_t z);
2211 extern SYCL_EXTERNAL uint16_t __imf_fmaxbf16(uint16_t x, uint16_t y);
2212 extern SYCL_EXTERNAL uint16_t __imf_fminbf16(uint16_t x, uint16_t y);
2218 extern SYCL_EXTERNAL uint16_t __imf_copysignbf16(uint16_t x, uint16_t y);
2221 extern SYCL_EXTERNAL double __imf_fma(
double x,
double y,
double z);
2245 extern SYCL_EXTERNAL unsigned int __imf_double2uint_rd(
double x);
2246 extern SYCL_EXTERNAL unsigned int __imf_double2uint_rn(
double x);
2247 extern SYCL_EXTERNAL unsigned int __imf_double2uint_ru(
double x);
2248 extern SYCL_EXTERNAL unsigned int __imf_double2uint_rz(
double x);
2249 extern SYCL_EXTERNAL long long int __imf_double2ll_rd(
double x);
2250 extern SYCL_EXTERNAL long long int __imf_double2ll_rn(
double x);
2251 extern SYCL_EXTERNAL long long int __imf_double2ll_ru(
double x);
2252 extern SYCL_EXTERNAL long long int __imf_double2ll_rz(
double x);
2253 extern SYCL_EXTERNAL double __imf_ll2double_rd(
long long int x);
2254 extern SYCL_EXTERNAL double __imf_ll2double_rn(
long long int x);
2255 extern SYCL_EXTERNAL double __imf_ll2double_ru(
long long int x);
2256 extern SYCL_EXTERNAL double __imf_ll2double_rz(
long long int x);
2257 extern SYCL_EXTERNAL double __imf_ull2double_rd(
unsigned long long int x);
2258 extern SYCL_EXTERNAL double __imf_ull2double_rn(
unsigned long long int x);
2259 extern SYCL_EXTERNAL double __imf_ull2double_ru(
unsigned long long int x);
2260 extern SYCL_EXTERNAL double __imf_ull2double_rz(
unsigned long long int x);
2261 extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rd(
double x);
2262 extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rn(
double x);
2263 extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_ru(
double x);
2264 extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rz(
double x);
2265 extern SYCL_EXTERNAL long long int __imf_double_as_longlong(
double x);
2266 extern SYCL_EXTERNAL double __imf_longlong_as_double(
long long int x);
2267 extern SYCL_EXTERNAL double __imf_uint2double_rd(
unsigned int x);
2268 extern SYCL_EXTERNAL double __imf_uint2double_rn(
unsigned int x);
2269 extern SYCL_EXTERNAL double __imf_uint2double_ru(
unsigned int x);
2270 extern SYCL_EXTERNAL double __imf_uint2double_rz(
unsigned int x);
2271 extern SYCL_EXTERNAL double __imf_hiloint2double(
int hi,
int lo);
2273 extern SYCL_EXTERNAL unsigned int __imf_vabs2(
unsigned int x);
2274 extern SYCL_EXTERNAL unsigned int __imf_vabs4(
unsigned int x);
2275 extern SYCL_EXTERNAL unsigned int __imf_vabsss2(
unsigned int x);
2276 extern SYCL_EXTERNAL unsigned int __imf_vabsss4(
unsigned int x);
2277 extern SYCL_EXTERNAL unsigned int __imf_vneg2(
unsigned int x);
2278 extern SYCL_EXTERNAL unsigned int __imf_vneg4(
unsigned int x);
2279 extern SYCL_EXTERNAL unsigned int __imf_vnegss2(
unsigned int x);
2280 extern SYCL_EXTERNAL unsigned int __imf_vnegss4(
unsigned int x);
2281 extern SYCL_EXTERNAL unsigned int __imf_vabsdiffs2(
unsigned int x,
2283 extern SYCL_EXTERNAL unsigned int __imf_vabsdiffs4(
unsigned int x,
2285 extern SYCL_EXTERNAL unsigned int __imf_vabsdiffu2(
unsigned int x,
2287 extern SYCL_EXTERNAL unsigned int __imf_vabsdiffu4(
unsigned int x,
2289 extern SYCL_EXTERNAL unsigned int __imf_vadd2(
unsigned int x,
unsigned int y);
2290 extern SYCL_EXTERNAL unsigned int __imf_vadd4(
unsigned int x,
unsigned int y);
2291 extern SYCL_EXTERNAL unsigned int __imf_vaddss2(
unsigned int x,
unsigned int y);
2292 extern SYCL_EXTERNAL unsigned int __imf_vaddss4(
unsigned int x,
unsigned int y);
2293 extern SYCL_EXTERNAL unsigned int __imf_vaddus2(
unsigned int x,
unsigned int y);
2294 extern SYCL_EXTERNAL unsigned int __imf_vaddus4(
unsigned int x,
unsigned int y);
2295 extern SYCL_EXTERNAL unsigned int __imf_vsub2(
unsigned int x,
unsigned int y);
2296 extern SYCL_EXTERNAL unsigned int __imf_vsub4(
unsigned int x,
unsigned int y);
2297 extern SYCL_EXTERNAL unsigned int __imf_vsubss2(
unsigned int x,
unsigned int y);
2298 extern SYCL_EXTERNAL unsigned int __imf_vsubss4(
unsigned int x,
unsigned int y);
2299 extern SYCL_EXTERNAL unsigned int __imf_vsubus2(
unsigned int x,
unsigned int y);
2300 extern SYCL_EXTERNAL unsigned int __imf_vsubus4(
unsigned int x,
unsigned int y);
2301 extern SYCL_EXTERNAL unsigned int __imf_vavgs2(
unsigned int x,
unsigned int y);
2302 extern SYCL_EXTERNAL unsigned int __imf_vavgs4(
unsigned int x,
unsigned int y);
2303 extern SYCL_EXTERNAL unsigned int __imf_vavgu2(
unsigned int x,
unsigned int y);
2304 extern SYCL_EXTERNAL unsigned int __imf_vavgu4(
unsigned int x,
unsigned int y);
2305 extern SYCL_EXTERNAL unsigned int __imf_vhaddu2(
unsigned int x,
unsigned int y);
2306 extern SYCL_EXTERNAL unsigned int __imf_vhaddu4(
unsigned int x,
unsigned int y);
2307 extern SYCL_EXTERNAL unsigned int __imf_vcmpeq2(
unsigned int x,
unsigned int y);
2308 extern SYCL_EXTERNAL unsigned int __imf_vcmpeq4(
unsigned int x,
unsigned int y);
2309 extern SYCL_EXTERNAL unsigned int __imf_vcmpne2(
unsigned int x,
unsigned int y);
2310 extern SYCL_EXTERNAL unsigned int __imf_vcmpne4(
unsigned int x,
unsigned int y);
2311 extern SYCL_EXTERNAL unsigned int __imf_vcmpges2(
unsigned int x,
2313 extern SYCL_EXTERNAL unsigned int __imf_vcmpges4(
unsigned int x,
2315 extern SYCL_EXTERNAL unsigned int __imf_vcmpgeu2(
unsigned int x,
2317 extern SYCL_EXTERNAL unsigned int __imf_vcmpgeu4(
unsigned int x,
2319 extern SYCL_EXTERNAL unsigned int __imf_vcmpgts2(
unsigned int x,
2321 extern SYCL_EXTERNAL unsigned int __imf_vcmpgts4(
unsigned int x,
2323 extern SYCL_EXTERNAL unsigned int __imf_vcmpgtu2(
unsigned int x,
2325 extern SYCL_EXTERNAL unsigned int __imf_vcmpgtu4(
unsigned int x,
2327 extern SYCL_EXTERNAL unsigned int __imf_vcmples2(
unsigned int x,
2329 extern SYCL_EXTERNAL unsigned int __imf_vcmples4(
unsigned int x,
2331 extern SYCL_EXTERNAL unsigned int __imf_vcmpleu2(
unsigned int x,
2333 extern SYCL_EXTERNAL unsigned int __imf_vcmpleu4(
unsigned int x,
2335 extern SYCL_EXTERNAL unsigned int __imf_vcmplts2(
unsigned int x,
2337 extern SYCL_EXTERNAL unsigned int __imf_vcmplts4(
unsigned int x,
2339 extern SYCL_EXTERNAL unsigned int __imf_vcmpltu2(
unsigned int x,
2341 extern SYCL_EXTERNAL unsigned int __imf_vcmpltu4(
unsigned int x,
2343 extern SYCL_EXTERNAL unsigned int __imf_vmaxs2(
unsigned int x,
unsigned int y);
2344 extern SYCL_EXTERNAL unsigned int __imf_vmaxs4(
unsigned int x,
unsigned int y);
2345 extern SYCL_EXTERNAL unsigned int __imf_vmaxu2(
unsigned int x,
unsigned int y);
2346 extern SYCL_EXTERNAL unsigned int __imf_vmaxu4(
unsigned int x,
unsigned int y);
2347 extern SYCL_EXTERNAL unsigned int __imf_vmins2(
unsigned int x,
unsigned int y);
2348 extern SYCL_EXTERNAL unsigned int __imf_vmins4(
unsigned int x,
unsigned int y);
2349 extern SYCL_EXTERNAL unsigned int __imf_vminu2(
unsigned int x,
unsigned int y);
2350 extern SYCL_EXTERNAL unsigned int __imf_vminu4(
unsigned int x,
unsigned int y);
2351 extern SYCL_EXTERNAL unsigned int __imf_vseteq2(
unsigned int x,
unsigned int y);
2352 extern SYCL_EXTERNAL unsigned int __imf_vseteq4(
unsigned int x,
unsigned int y);
2353 extern SYCL_EXTERNAL unsigned int __imf_vsetne2(
unsigned int x,
unsigned int y);
2354 extern SYCL_EXTERNAL unsigned int __imf_vsetne4(
unsigned int x,
unsigned int y);
2355 extern SYCL_EXTERNAL unsigned int __imf_vsetges2(
unsigned int x,
2357 extern SYCL_EXTERNAL unsigned int __imf_vsetges4(
unsigned int x,
2359 extern SYCL_EXTERNAL unsigned int __imf_vsetgeu2(
unsigned int x,
2361 extern SYCL_EXTERNAL unsigned int __imf_vsetgeu4(
unsigned int x,
2363 extern SYCL_EXTERNAL unsigned int __imf_vsetgts2(
unsigned int x,
2365 extern SYCL_EXTERNAL unsigned int __imf_vsetgts4(
unsigned int x,
2367 extern SYCL_EXTERNAL unsigned int __imf_vsetgtu2(
unsigned int x,
2369 extern SYCL_EXTERNAL unsigned int __imf_vsetgtu4(
unsigned int x,
2371 extern SYCL_EXTERNAL unsigned int __imf_vsetles2(
unsigned int x,
2373 extern SYCL_EXTERNAL unsigned int __imf_vsetles4(
unsigned int x,
2375 extern SYCL_EXTERNAL unsigned int __imf_vsetleu2(
unsigned int x,
2377 extern SYCL_EXTERNAL unsigned int __imf_vsetleu4(
unsigned int x,
2379 extern SYCL_EXTERNAL unsigned int __imf_vsetlts2(
unsigned int x,
2381 extern SYCL_EXTERNAL unsigned int __imf_vsetlts4(
unsigned int x,
2383 extern SYCL_EXTERNAL unsigned int __imf_vsetltu2(
unsigned int x,
2385 extern SYCL_EXTERNAL unsigned int __imf_vsetltu4(
unsigned int x,
2387 extern SYCL_EXTERNAL unsigned int __imf_vsads2(
unsigned int x,
unsigned int y);
2388 extern SYCL_EXTERNAL unsigned int __imf_vsads4(
unsigned int x,
unsigned int y);
2389 extern SYCL_EXTERNAL unsigned int __imf_vsadu2(
unsigned int x,
unsigned int y);
2390 extern SYCL_EXTERNAL unsigned int __imf_vsadu4(
unsigned int x,
unsigned int y);
2394 extern SYCL_EXTERNAL void __assert_fail(
const char *expr,
const char *file,
2395 unsigned int line,
const char *func);
2410 extern SYCL_EXTERNAL float __complex__ cprojf(
float __complex__ z);
2411 extern SYCL_EXTERNAL double __complex__ cproj(
double __complex__ z);
2412 extern SYCL_EXTERNAL float __complex__ cexpf(
float __complex__ z);
2413 extern SYCL_EXTERNAL double __complex__ cexp(
double __complex__ z);
2414 extern SYCL_EXTERNAL float __complex__ clogf(
float __complex__ z);
2416 extern SYCL_EXTERNAL float __complex__ cpowf(
float __complex__ z);
2417 extern SYCL_EXTERNAL double __complex__ cpow(
double __complex__ z);
2418 extern SYCL_EXTERNAL float __complex__ csqrtf(
float __complex__ z);
2419 extern SYCL_EXTERNAL double __complex__ csqrt(
double __complex__ z);
2420 extern SYCL_EXTERNAL float __complex__ csinhf(
float __complex__ z);
2421 extern SYCL_EXTERNAL double __complex__ csinh(
double __complex__ z);
2422 extern SYCL_EXTERNAL float __complex__ ccoshf(
float __complex__ z);
2423 extern SYCL_EXTERNAL double __complex__ ccosh(
double __complex__ z);
2424 extern SYCL_EXTERNAL float __complex__ ctanhf(
float __complex__ z);
2425 extern SYCL_EXTERNAL double __complex__ ctanh(
double __complex__ z);
2426 extern SYCL_EXTERNAL float __complex__ csinf(
float __complex__ z);
2427 extern SYCL_EXTERNAL double __complex__ csin(
double __complex__ z);
2428 extern SYCL_EXTERNAL float __complex__ ccosf(
float __complex__ z);
2429 extern SYCL_EXTERNAL double __complex__ ccos(
double __complex__ z);
2430 extern SYCL_EXTERNAL float __complex__ ctanf(
float __complex__ z);
2431 extern SYCL_EXTERNAL double __complex__ ctan(
double __complex__ z);
2432 extern SYCL_EXTERNAL float __complex__ cacosf(
float __complex__ z);
2433 extern SYCL_EXTERNAL double __complex__ cacos(
double __complex__ z);
2434 extern SYCL_EXTERNAL float __complex__ cacoshf(
float __complex__ z);
2435 extern SYCL_EXTERNAL double __complex__ cacosh(
double __complex__ z);
2436 extern SYCL_EXTERNAL float __complex__ casinf(
float __complex__ z);
2437 extern SYCL_EXTERNAL double __complex__ casin(
double __complex__ z);
2438 extern SYCL_EXTERNAL float __complex__ casinhf(
float __complex__ z);
2439 extern SYCL_EXTERNAL double __complex__ casinh(
double __complex__ z);
2440 extern SYCL_EXTERNAL float __complex__ catanf(
float __complex__ z);
2441 extern SYCL_EXTERNAL double __complex__ catan(
double __complex__ z);
2442 extern SYCL_EXTERNAL float __complex__ catanhf(
float __complex__ z);
2443 extern SYCL_EXTERNAL double __complex__ catanh(
double __complex__ z);
2444 extern SYCL_EXTERNAL float __complex__ cpolarf(
float rho,
float theta);
2445 extern SYCL_EXTERNAL double __complex__ cpolar(
double rho,
double theta);
2446 extern SYCL_EXTERNAL float __complex__ __mulsc3(
float a,
float b,
float c,
2448 extern SYCL_EXTERNAL double __complex__ __muldc3(
double a,
double b,
double c,
2450 extern SYCL_EXTERNAL float __complex__ __divsc3(
float a,
float b,
float c,
2452 extern SYCL_EXTERNAL double __complex__ __divdc3(
float a,
float b,
float c,
2455 #elif defined(_WIN32)
2468 extern SYCL_EXTERNAL short _Exp(
double *px,
double y,
short eoff);
2474 extern SYCL_EXTERNAL short _FExp(
float *px,
float y,
short eoff);
2478 extern SYCL_EXTERNAL void _wassert(
const wchar_t *wexpr,
const wchar_t *wfile,
#define __SYCL_HALF_PRECISION_MATH_FUNCTION_2_OVERLOAD(NAME)
#define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME)
#define __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(NAME)
#define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME)
#define __SYCL_NATIVE_MATH_FUNCTION_OVERLOAD(NAME)
#define __SYCL_MATH_FUNCTION_OVERLOAD_FM(NAME)
#define __FAST_MATH_SGENFLOAT(T)
#define __SYCL_NATIVE_MATH_FUNCTION_2_OVERLOAD(NAME)
#define __FAST_MATH_GENFLOAT(T)
#define __SYCL_MATH_FUNCTION_2_OVERLOAD(NAME)
#define __SYCL_MATH_FUNCTION_OVERLOAD(NAME)
Provides a cross-platform math array class template that works on SYCL devices as well as in host C++...
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_DEPRECATED(message)
#define __SYCL2020_DEPRECATED(message)
#define __SYCL_ALWAYS_INLINE
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > trunc(const sycl::ext::intel::esimd::simd< float, SZ > &src0, Sat sat={})
Round to integral value using the round to zero rounding mode (vector version).
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > floor(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Floor" operation, vector version - alias of rndd.
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > ceil(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Ceiling" operation, vector version - alias of rndu.
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > div(sycl::ext::intel::esimd::simd< T, SZ > &remainder, sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Integral division with a vector dividend and a scalar divisor.
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > asin(sycl::ext::intel::esimd::simd< T, SZ > src0)
__ESIMD_API sycl::ext::intel::esimd::simd< float, SZ > sincos(sycl::ext::intel::esimd::simd< float, SZ > &dstcos, U src0, Sat sat={})
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > acos(sycl::ext::intel::esimd::simd< T, SZ > src0)
ESIMD_DETAIL ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< T, SZ > atan(sycl::ext::intel::esimd::simd< T, SZ > src0)
sycl::ext::intel::esimd::simd< float, N > fmod(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > line(sycl::ext::intel::esimd::simd< T, 4 > src0, sycl::ext::intel::esimd::simd< T, SZ > src1, Sat sat={})
Linear equation.
sycl::ext::intel::esimd::simd< float, N > atan2(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
float __imf_truncf(float)
float __imf_saturatef(float)
float __imf_rsqrtf(float)
double __imf_rint(double)
double __imf_ceil(double)
double __imf_sqrt(double)
double __imf_trunc(double)
_iml_half_internal __imf_rintf16(_iml_half_internal)
double __imf_rsqrt(double)
float __imf_copysignf(float, float)
_iml_half_internal __imf_floorf16(_iml_half_internal)
double __imf_floor(double)
_iml_half_internal __imf_sqrtf16(_iml_half_internal)
_iml_half_internal __imf_truncf16(_iml_half_internal)
float __imf_floorf(float)
double __imf_copysign(double, double)
_iml_half_internal __imf_copysignf16(_iml_half_internal, _iml_half_internal)
_iml_half_internal __imf_rsqrtf16(_iml_half_internal)
_iml_half_internal __imf_ceilf16(_iml_half_internal)
__SYCL_EXTERN_STREAM_ATTRS ostream clog
Linked to standard error (buffered)
is_contained< T, gtl::integer_list > is_geninteger
vec< T, 2 > to_vec2(marray< T, N > x, size_t start)
constexpr bool msbIsSet(const T x)
void memcpy(void *Dst, const void *Src, std::size_t Size)
typename std::enable_if< B, T >::type enable_if_t
std::enable_if_t< std::is_same_v< Tp, float >, float > copysign(Tp x, Tp y)
std::enable_if_t< std::is_same_v< Tp, float >, float > rint(Tp x)
sycl::half hadd(sycl::half x, sycl::half y)
__SYCL_ALWAYS_INLINE std::enable_if_t< std::is_same_v< T, half >||std::is_same_v< T, float >, sycl::marray< T, N > > tanh(sycl::marray< T, N > x) __NOEXC
__SYCL_ALWAYS_INLINE sycl::marray< half, N > exp2(sycl::marray< half, N > x) __NOEXC
std::enable_if_t< std::is_same< T, bfloat16 >::value, bool > isnan(T x)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fma(T x, T y, T z)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmax(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fmin(T x, T y)
std::enable_if_t< detail::is_bf16_storage_type< T >::value, T > fabs(T x)
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > log(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > sin(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > cos(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > exp(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC
__SYCL_ALWAYS_INLINE std::enable_if_t< __FAST_MATH_SGENFLOAT(T), marray< T, N > > powr(marray< T, N > x, marray< T, N > y) __NOEXC
sycl::detail::half_impl::half half
---— Error handling, matching OpenCL plugin semantics.
simd< _Tp, _Abi > min(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
int popcount(const simd_mask< _Tp, _Abi > &) noexcept
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
simd< _Tp, _Abi > clamp(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &)