DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
21 
22 #include <cstdint>
23 
24 namespace sycl {
25 inline namespace _V1 {
26 namespace ext::intel::esimd {
27 
40 
43 
68 template <typename T0, typename T1, int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70  std::is_same_v<T1, T0>,
73  if constexpr (detail::is_generic_floating_point_v<T0>)
74  return __esimd_sat<T0, T1, SZ>(src.data());
75  else if constexpr (detail::is_generic_floating_point_v<T1>) {
76  if constexpr (std::is_unsigned_v<T0>)
77  return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
78  else
79  return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
80  } else if constexpr (std::is_unsigned_v<T0>) {
81  if constexpr (std::is_unsigned_v<T1>)
82  return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
83  else
84  return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
85  } else {
86  if constexpr (std::is_signed_v<T1>)
87  return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
88  else
89  return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
90  }
91 }
92 
94 // abs
95 namespace detail {
96 
97 template <typename TRes, typename TArg, int SZ>
98 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
99 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
100  simd<TArg, SZ> Result;
101  if constexpr (detail::is_generic_floating_point_v<TArg>)
102  Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
103  else
104  Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
105  return convert<TRes>(Result);
106 }
107 
108 template <typename TRes, typename TArg>
109 
110 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
111  detail::is_esimd_scalar<TArg>::value,
112  TRes>
113 __esimd_abs_common_internal(TArg src0) {
114  simd<TArg, 1> Src0 = src0;
115  simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
116  return convert<TRes>(Result)[0];
117 }
118 } // namespace detail
120 
127 template <typename TRes, typename TArg, int SZ>
128 __ESIMD_API std::enable_if_t<
129  !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
132  return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(src0.data());
133 }
134 
140 template <typename TRes, typename TArg>
141 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
142  std::remove_const_t<TArg>>::value &&
143  detail::is_esimd_scalar<TRes>::value &&
144  detail::is_esimd_scalar<TArg>::value,
145  std::remove_const_t<TRes>>
146 abs(TArg src0) {
147  return detail::__esimd_abs_common_internal<TRes, TArg>(src0);
148 }
149 
157 template <typename T1, int SZ> __ESIMD_API simd<T1, SZ> abs(simd<T1, SZ> src0) {
158  return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
159 }
160 
167 template <typename T1>
168 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
169  std::remove_const_t<T1>>
170 abs(T1 src0) {
171  return detail::__esimd_abs_common_internal<T1, T1>(src0);
172 }
173 
183 template <typename T, int SZ, class Sat = saturation_off_tag>
185  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
186 
187  if constexpr (std::is_floating_point<T>::value) {
188  auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
189  if constexpr (is_sat)
190  Result = __esimd_sat<T, T, SZ>(Result);
191  return simd<T, SZ>(Result);
192  } else if constexpr (std::is_unsigned<T>::value) {
193  auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
194  if constexpr (is_sat)
195  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
196  return simd<T, SZ>(Result);
197  } else {
198  auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
199  if constexpr (is_sat)
200  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
201  return simd<T, SZ>(Result);
202  }
203 }
204 
215 template <typename T, int SZ, class Sat = saturation_off_tag>
216 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
217  max)(simd<T, SZ> src0, T src1, Sat sat = {}) {
218  simd<T, SZ> Src1 = src1;
219  simd<T, SZ> Result = (esimd::max)(src0, Src1, sat);
220  return Result;
221 }
222 
233 template <typename T, int SZ, class Sat = saturation_off_tag>
234 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
235  max)(T src0, simd<T, SZ> src1, Sat sat = {}) {
236  simd<T, SZ> Src0 = src0;
237  simd<T, SZ> Result = (esimd::max)(Src0, src1, sat);
238  return Result;
239 }
240 
249 template <typename T, class Sat = saturation_off_tag>
250 ESIMD_NODEBUG ESIMD_INLINE
251 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(max)(T src0, T src1,
252  Sat sat = {}) {
253  simd<T, 1> Src0 = src0;
254  simd<T, 1> Src1 = src1;
255  simd<T, 1> Result = (esimd::max)(Src0, Src1, sat);
256  return Result[0];
257 }
258 
268 template <typename T, int SZ, class Sat = saturation_off_tag>
270  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
271 
272  if constexpr (std::is_floating_point<T>::value) {
273  auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
274  if constexpr (is_sat)
275  Result = __esimd_sat<T, T, SZ>(Result);
276  return simd<T, SZ>(Result);
277  } else if constexpr (std::is_unsigned<T>::value) {
278  auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
279  if constexpr (is_sat)
280  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
281  return simd<T, SZ>(Result);
282  } else {
283  auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
284  if constexpr (is_sat)
285  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
286  return simd<T, SZ>(Result);
287  }
288 }
289 
300 template <typename T, int SZ, class Sat = saturation_off_tag>
301 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
302  min)(simd<T, SZ> src0, T src1, Sat sat = {}) {
303  simd<T, SZ> Src1 = src1;
304  simd<T, SZ> Result = (esimd::min)(src0, Src1, sat);
305  return Result;
306 }
307 
318 template <typename T, int SZ, class Sat = saturation_off_tag>
319 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
320  min)(T src0, simd<T, SZ> src1, Sat sat = {}) {
321  simd<T, SZ> Src0 = src0;
322  simd<T, SZ> Result = (esimd::min)(Src0, src1, sat);
323  return Result;
324 }
325 
334 template <typename T, class Sat = saturation_off_tag>
335 ESIMD_NODEBUG ESIMD_INLINE
336 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(min)(T src0, T src1,
337  Sat sat = {}) {
338  simd<T, 1> Src0 = src0;
339  simd<T, 1> Src1 = src1;
340  simd<T, 1> Result = (esimd::min)(Src0, Src1, sat);
341  return Result[0];
342 }
343 
345 
348 
349 #if defined(__SYCL_DEVICE_ONLY__)
350 #define __ESIMD_VECTOR_IMPL(T, name, iname) \
351  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
352  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>, N>(src.data()); \
353  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
354  return res; \
355  else \
356  return esimd::saturate<T>(simd<T, N>(res));
357 #define __ESIMD_SCALAR_IMPL(T, name, iname) \
358  __ESIMD_DNS::__raw_t<T> res = \
359  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>>(src); \
360  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
361  return res; \
362  else \
363  return esimd::saturate<T>(simd<T, 1>(res))[0];
364 #else
365 #define __ESIMD_VECTOR_IMPL(T, name, iname) return 0;
366 #define __ESIMD_SCALAR_IMPL(T, name, iname) return 0;
367 #endif // __SYCL_DEVICE_ONLY__
368 
369 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
370  \
371  template <class T, int N, class Sat = saturation_off_tag, \
372  class = std::enable_if_t<COND>> \
373  __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
374  __ESIMD_VECTOR_IMPL(T, name, iname) \
375  } \
376  \
377  \
378  template <typename T, class Sat = saturation_off_tag, \
379  class = std::enable_if_t<COND>> \
380  __ESIMD_API T name(T src, Sat sat = {}) { \
381  __ESIMD_SCALAR_IMPL(T, name, iname) \
382  }
383 
384 #define __ESIMD_EMATH_IEEE_COND \
385  detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
386 
387 #define __ESIMD_EMATH_SPIRV_COND \
388  std::is_same_v<T, float> || std::is_same_v<T, sycl::half>
389 
392 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, inv, recip)
393 
399 
403 
404 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, sqrt, sqrt)
407 
408 template <class T, int N, class Sat = saturation_off_tag,
410  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
411 __ESIMD_API simd<T, N> sqrt_ieee(simd<T, N> src, Sat sat = {}) {
412  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res =
413  __esimd_ieee_sqrt<T, N>(src.data());
414  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
415  return res;
416  else
417  return esimd::saturate<T>(simd<T, N>(res));
418 }
419 
421 template <typename T, class Sat = saturation_off_tag,
422  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
423 __ESIMD_API T sqrt_ieee(T src, Sat sat = {}) {
424  simd<T, 1> src_vec = src;
425  simd<T, 1> res = sqrt_ieee<T, 1>(src_vec, sat);
426  return res[0];
427 }
428 
433 
437 
441 
442 template <class T, int N, class Sat = saturation_off_tag>
446 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, simd<double, N>>
447 rsqrt(simd<T, N> src, Sat sat = {}) {
448  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
449  return inv(sqrt(src));
450  else
451  return esimd::saturate<double>(inv(sqrt(src)));
452 }
453 
455 template <class T, class Sat = saturation_off_tag>
456 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, double>
457 rsqrt(T src, Sat sat = {}) {
458  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
459  return inv(sqrt(src));
460  else
461  return esimd::saturate<double>(inv(sqrt(src)));
462 }
463 
464 #undef __ESIMD_UNARY_INTRINSIC_DEF
465 #undef __ESIMD_VECTOR_IMPL
466 #undef __ESIMD_SCALAR_IMPL
467 
468 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
469  \
470  template <class T, int N, class U, class Sat = saturation_off_tag, \
471  class = std::enable_if_t<COND>> \
472  __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
473  Sat sat = {}) { \
474  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
475  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
476  RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
477  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
478  return res_raw; \
479  else \
480  return esimd::saturate<T>(simd<T, N>(res_raw)); \
481  } \
482  \
483  \
484  template <class T, int N, class U, class Sat = saturation_off_tag, \
485  class = std::enable_if_t<COND>> \
486  __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
487  return name<T, N, U>(src0, simd<U, N>(src1), sat); \
488  } \
489  \
490  \
491  template <class T, class U, class Sat = saturation_off_tag, \
492  class = std::enable_if_t<COND>> \
493  __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
494  simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
495  return res[0]; \
496  }
497 
500 template <class T, int N, class U, class Sat = saturation_off_tag,
501  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
503 #if defined(__SYCL_DEVICE_ONLY__)
504  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>;
505  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data());
506  RawVecT res_raw = __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>, N>(
507  src0.data(), src1_raw_conv);
508  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
509  return res_raw;
510  else
511  return esimd::saturate<T>(simd<T, N>(res_raw));
512 #else
513  return 0;
514 #endif // __SYCL_DEVICE_ONLY__
515 }
516 
518 template <class T, int N, class U, class Sat = saturation_off_tag,
519  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
520 __ESIMD_API simd<T, N> pow(simd<T, N> src0, U src1, Sat sat = {}) {
521  return pow<T, N, U>(src0, simd<U, N>(src1), sat);
522 }
523 
525 template <class T, class U, class Sat = saturation_off_tag,
526  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
527 __ESIMD_API T pow(T src0, U src1, Sat sat = {}) {
528 #if defined(__SYCL_DEVICE_ONLY__)
529  using ResT = __ESIMD_DNS::__raw_t<T>;
530  ResT src1_raw_conv = detail::convert_scalar<T, U>(src1);
531  ResT res_raw =
532  __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>>(src0, src1_raw_conv);
533  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
534  return res_raw;
535  else
536  return esimd::saturate<T>(simd<T, 1>(res_raw))[0];
537 #else
538  return 0;
539 #endif // __SYCL_DEVICE_ONLY__
540 }
541 
544 
545 #undef __ESIMD_BINARY_INTRINSIC_DEF
546 #undef __ESIMD_EMATH_IEEE_COND
547 #undef __ESIMD_EMATH_SPIRV_COND
548 
550 
553 
555 namespace detail {
556 // std::numbers::ln2_v<float> in c++20
557 constexpr float ln2 = 0.69314718f;
558 // std::numbers::log2e_v<float> in c++20
559 constexpr float log2e = 1.442695f;
560 } // namespace detail
562 
567 template <class T, int SZ, class Sat = saturation_off_tag>
568 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> log(simd<T, SZ> src0, Sat sat = {}) {
569  using CppT = __ESIMD_DNS::__cpp_t<T>;
570  simd<T, SZ> Result =
571  esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
572 
573  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
574  return Result;
575  else
576  return esimd::saturate<T>(Result);
577 }
578 
579 template <class T, class Sat = saturation_off_tag>
580 ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat = {}) {
581  return esimd::log<T, 1>(src0, sat)[0];
582 }
583 
588 template <class T, int SZ, class Sat = saturation_off_tag>
589 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> exp(simd<T, SZ> src0, Sat sat = {}) {
590  using CppT = __ESIMD_DNS::__cpp_t<T>;
591  return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
592 }
593 
594 template <class T, class Sat = saturation_off_tag>
595 ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat = {}) {
596  return esimd::exp<T, 1>(src0, sat)[0];
597 }
598 
600 
603 
605 // Rounding intrinsics.
607 
608 #define __ESIMD_INTRINSIC_DEF(name) \
609  \
610  \
611  \
613  \
614  \
615  \
616  template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
617  __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
618  Sat sat = {}) { \
619  __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
620  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
621  return Result; \
622  else if constexpr (!std::is_same_v<float, T>) { \
623  auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
624  return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
625  } else { \
626  return __ESIMD_NS::saturate<T>(Result); \
627  } \
628  } \
629  \
630  template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
631  __ESIMD_API T name(float src0, Sat sat = {}) { \
632  __ESIMD_NS::simd<float, 1> Src0 = src0; \
633  __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
634  return Result[0]; \
635  }
636 
645 
654 
662 
670 
671 #undef __ESIMD_INTRINSIC_DEF
673 
676 
678 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
679 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
680 floor(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
681  return esimd::rndd<RT, SZ>(src0, sat);
682 }
683 
685 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
686 ESIMD_INLINE RT floor(float src0, Sat sat = {}) {
687  return esimd::rndd<RT, 1U>(src0, sat)[0];
688 }
689 
691 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
692 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
693 ceil(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
694  return esimd::rndu<RT, SZ>(src0, sat);
695 }
696 
698 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
699 ESIMD_INLINE RT ceil(float src0, Sat sat = {}) {
700  return esimd::rndu<RT, 1U>(src0, sat);
701 }
702 
711 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
712 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
713 trunc(const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
714  return esimd::rndz<RT, SZ>(src0, sat);
715 }
716 
724 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
725 __ESIMD_API RT trunc(float src0, Sat sat = {}) {
726  return esimd::rndz<RT, 1U>(src0, sat)[0];
727 }
728 
730 
733 
742 template <int N>
743 ESIMD_NODEBUG
744  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
746  return __esimd_pack_mask<N>(src0.data());
747 }
748 
756 template <int N>
757 ESIMD_NODEBUG
758  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
760  return __esimd_unpack_mask<N>(src0);
761 }
762 
765 template <int N>
766 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
768  simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
769  src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
770  return esimd::pack_mask(src_0);
771 }
772 
779 template <typename T, int N>
780 __ESIMD_API
781  std::enable_if_t<(std::is_same_v<T, ushort> || std::is_same_v<T, uint>) &&
782  (N > 0 && N <= 32),
784  ballot(simd<T, N> mask) {
785  simd_mask<N> cmp = (mask != 0);
786  if constexpr (N == 8 || N == 16 || N == 32) {
787  return __esimd_pack_mask<N>(cmp.data());
788  } else {
789  constexpr int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
790  simd<uint16_t, N1> res = 0;
791  res.template select<N, 1>() = cmp.data();
792  return __esimd_pack_mask<N1>(res.data());
793  }
794 }
795 
800 template <typename T, int N>
801 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
802  std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
803 cbit(simd<T, N> src) {
804  return __esimd_cbit<T, N>(src.data());
805 }
806 
809 template <typename T>
810 __ESIMD_API
811  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4), uint32_t>
812  cbit(T src) {
813  simd<T, 1> Src = src;
814  simd<uint32_t, 1> Result = esimd::cbit(Src);
815  return Result[0];
816 }
817 
822 template <typename BaseTy, typename RegionTy>
823 __ESIMD_API std::enable_if_t<
824  std::is_integral<
826  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
828  uint32_t>
830  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
831  simd<Ty, 1> Src = src;
832  simd<uint32_t, 1> Result = esimd::cbit(Src);
833  return Result[0];
834 }
835 
843 template <typename T, int N>
844 __ESIMD_API
845  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), simd<T, N>>
846  fbl(simd<T, N> src) {
847  return __esimd_fbl<T, N>(src.data());
848 }
849 
852 template <typename T>
853 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
854 fbl(T src) {
855  simd<T, 1> Src = src;
856  simd<T, 1> Result = esimd::fbl(Src);
857  return Result[0];
858 }
859 
865 template <typename BaseTy, typename RegionTy>
866 __ESIMD_API std::enable_if_t<
867  std::is_integral<
869  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
873  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
874  simd<Ty, 1> Src = src;
875  simd<Ty, 1> Result = esimd::fbl(Src);
876  return Result[0];
877 }
878 
886 template <typename T, int N>
887 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
888  std::is_signed<T>::value && (sizeof(T) == 4),
890 fbh(simd<T, N> src) {
891  return __esimd_sfbh<T, N>(src.data());
892 }
893 
900 template <typename T, int N>
901 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
902  !std::is_signed<T>::value && (sizeof(T) == 4),
904 fbh(simd<T, N> src) {
905  return __esimd_ufbh<T, N>(src.data());
906 }
907 
910 template <typename T>
911 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
912 fbh(T src) {
913  simd<T, 1> Src = src;
914  simd<T, 1> Result = esimd::fbh(Src);
915  return Result[0];
916 }
917 
923 template <typename BaseTy, typename RegionTy>
924 __ESIMD_API std::enable_if_t<
925  std::is_integral<
927  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
931  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
932  simd<Ty, 1> Src = src;
933  simd<Ty, 1> Result = esimd::fbh(Src);
934  return Result[0];
935 }
936 
947 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
948 __ESIMD_API
949  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
952  using ComputationTy =
953  __ESIMD_DNS::computation_type_t<decltype(src0), int32_t>;
954  ComputationTy Src0 = src0;
955  ComputationTy Src1 = src1;
956 
957  if constexpr (std::is_same_v<Sat, saturation_on_tag>) {
958  if constexpr (std::is_unsigned<T0>::value) {
959  if constexpr (std::is_unsigned<
960  typename ComputationTy::element_type>::value)
961  return __esimd_uushl_sat<T0, typename ComputationTy::element_type, SZ>(
962  Src0.data(), Src1.data());
963  else
964  return __esimd_usshl_sat<T0, typename ComputationTy::element_type, SZ>(
965  Src0.data(), Src1.data());
966  } else {
967  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
968  return __esimd_sushl_sat<T0, typename ComputationTy::element_type, SZ>(
969  Src0.data(), Src1.data());
970  else
971  return __esimd_ssshl_sat<T0, typename ComputationTy::element_type, SZ>(
972  Src0.data(), Src1.data());
973  }
974  } else {
975  if constexpr (std::is_unsigned<T0>::value) {
976  if constexpr (std::is_unsigned<
977  typename ComputationTy::element_type>::value)
978  return __esimd_uushl<T0, typename ComputationTy::element_type, SZ>(
979  Src0.data(), Src1.data());
980  else
981  return __esimd_usshl<T0, typename ComputationTy::element_type, SZ>(
982  Src0.data(), Src1.data());
983  } else {
984  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
985  return __esimd_sushl<T0, typename ComputationTy::element_type, SZ>(
986  Src0.data(), Src1.data());
987  else
988  return __esimd_ssshl<T0, typename ComputationTy::element_type, SZ>(
989  Src0.data(), Src1.data());
990  }
991  }
992 }
993 
1003 template <typename T0, typename T1, int SZ, typename U,
1004  class Sat = saturation_off_tag>
1005 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1006  std::is_integral<T1>::value &&
1007  std::is_integral<U>::value,
1009 shl(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1010  simd<U, SZ> Src1 = src1;
1011  return shl<T0, T1, SZ>(src0, Src1, sat);
1012 }
1013 
1023 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1024 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1025  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1026  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1027  std::is_integral<T0>::value &&
1028  std::is_integral<T1>::value &&
1029  std::is_integral<T2>::value,
1030  std::remove_const_t<T0>>
1031 shl(T1 src0, T2 src1, Sat sat = {}) {
1032  simd<T1, 1> Src0 = src0;
1033  simd<T0, 1> Result = shl<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1034  return Result[0];
1035 }
1036 
1047 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1048 __ESIMD_API
1049  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1052  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1053  typedef typename std::make_unsigned<IntermedTy>::type ComputationTy;
1056  simd<ComputationTy, SZ> Result = Src0.data() >> Src1.data();
1057 
1058  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1059  return Result;
1060  else
1061  return saturate<T0>(Result);
1062 }
1063 
1074 template <typename T0, typename T1, int SZ, typename U,
1075  class Sat = saturation_off_tag>
1076 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1077  std::is_integral<T1>::value &&
1078  std::is_integral<U>::value,
1080 lsr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1081  simd<T1, SZ> Src1 = src1;
1082  return lsr<T0, T1, SZ>(src0, Src1, sat);
1083 }
1084 
1095 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1096 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1097  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1098  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1099  std::is_integral<T0>::value &&
1100  std::is_integral<T1>::value &&
1101  std::is_integral<T2>::value,
1102  std::remove_const_t<T0>>
1103 lsr(T1 src0, T2 src1, Sat sat = {}) {
1104  simd<T1, 1> Src0 = src0;
1105  simd<T0, 1> Result = lsr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1106 
1107  return Result[0];
1108 }
1109 
1120 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1121 __ESIMD_API
1122  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1125  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1126  typedef typename std::make_signed<IntermedTy>::type ComputationTy;
1129  simd<ComputationTy, SZ> Result = Src0 >> Src1;
1130  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1131  return Result;
1132  else
1133  return saturate<T0>(Result);
1134 }
1135 
1146 template <typename T0, typename T1, int SZ, typename U,
1147  class Sat = saturation_off_tag>
1148 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1149  std::is_integral<T1>::value &&
1150  std::is_integral<U>::value,
1152 asr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1153  simd<U, SZ> Src1 = src1;
1154  return asr<T0, T1, SZ>(src0, Src1, sat);
1155 }
1156 
1167 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1168 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1169  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1170  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1171  std::is_integral<T0>::value &&
1172  std::is_integral<T1>::value &&
1173  std::is_integral<T2>::value,
1174  std::remove_const_t<T0>>
1175 asr(T1 src0, T2 src1, Sat sat = {}) {
1176  simd<T1, 1> Src0 = src0;
1177  simd<T0, 1> Result = esimd::asr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1178  return Result[0];
1179 }
1180 
1191 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1192 __ESIMD_API
1193  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1196  if constexpr (std::is_unsigned<T1>::value) {
1197  return esimd::lsr<T0, T1, SZ>(src0, src1, sat);
1198  } else {
1199  return esimd::asr<T0, T1, SZ>(src0, src1, sat);
1200  }
1201 }
1202 
1213 template <typename T0, typename T1, int SZ, typename U,
1214  class Sat = saturation_off_tag>
1215 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1216  std::is_integral<T1>::value &&
1217  std::is_integral<U>::value,
1219 shr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1220  simd<U, SZ> Src1 = src1;
1221  return shr<T0, T1, SZ>(src0, Src1, sat);
1222 }
1223 
1233 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1234 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1235  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1236  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1237  std::is_integral<T0>::value &&
1238  std::is_integral<T1>::value &&
1239  std::is_integral<T2>::value,
1240  std::remove_const_t<T0>>
1241 shr(T1 src0, T2 src1, Sat sat = {}) {
1242  simd<T1, 1> Src0 = src0;
1243  simd<T0, 1> Result = shr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1244  return Result[0];
1245 }
1246 
1255 template <typename T0, typename T1, int SZ>
1256 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1257  uint32_t, int64_t, uint64_t>() &&
1258  detail::is_type<T1, int16_t, uint16_t, int32_t,
1259  uint32_t, int64_t, uint64_t>(),
1262  return __esimd_rol<T0, T1, SZ>(src0.data(), src1.data());
1263 }
1264 
1273 template <typename T0, typename T1, int SZ, typename U>
1274 __ESIMD_API
1275  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1276  int64_t, uint64_t>() &&
1277  detail::is_type<T1, int16_t, uint16_t, int32_t,
1278  uint32_t, int64_t, uint64_t>() &&
1279  detail::is_type<U, int16_t, uint16_t, int32_t,
1280  uint32_t, int64_t, uint64_t>(),
1282  rol(simd<T1, SZ> src0, U src1) {
1283  simd<T1, SZ> Src1 = src1;
1284  return rol<T0>(src0, Src1);
1285 }
1286 
1294 template <typename T0, typename T1, typename T2>
1295 __ESIMD_API
1296  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1297  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1298  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1299  detail::is_type<T0, int16_t, uint16_t, int32_t,
1300  uint32_t, int64_t, uint64_t>() &&
1301  detail::is_type<T1, int16_t, uint16_t, int32_t,
1302  uint32_t, int64_t, uint64_t>() &&
1303  detail::is_type<T2, int16_t, uint16_t, int32_t,
1304  uint32_t, int64_t, uint64_t>(),
1305  std::remove_const_t<T0>>
1306  rol(T1 src0, T2 src1) {
1307  simd<T1, 1> Src0 = src0;
1308  simd<T0, 1> Result = rol<T0, T1, 1, T2>(Src0, src1);
1309  return Result[0];
1310 }
1311 
1320 template <typename T0, typename T1, int SZ>
1321 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1322  uint32_t, int64_t, uint64_t>() &&
1323  detail::is_type<T1, int16_t, uint16_t, int32_t,
1324  uint32_t, int64_t, uint64_t>(),
1327  return __esimd_ror<T0, T1, SZ>(src0.data(), src1.data());
1328 }
1329 
1338 template <typename T0, typename T1, int SZ, typename U>
1339 __ESIMD_API
1340  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1341  int64_t, uint64_t>() &&
1342  detail::is_type<T1, int16_t, uint16_t, int32_t,
1343  uint32_t, int64_t, uint64_t>() &&
1344  detail::is_type<U, int16_t, uint16_t, int32_t,
1345  uint32_t, int64_t, uint64_t>(),
1347  ror(simd<T1, SZ> src0, U src1) {
1348  simd<T1, SZ> Src1 = src1;
1349  return esimd::ror<T0>(src0, Src1);
1350 }
1351 
1359 template <typename T0, typename T1, typename T2>
1360 __ESIMD_API
1361  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1362  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1363  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1364  detail::is_type<T0, int16_t, uint16_t, int32_t,
1365  uint32_t, int64_t, uint64_t>() &&
1366  detail::is_type<T1, int16_t, uint16_t, int32_t,
1367  uint32_t, int64_t, uint64_t>() &&
1368  detail::is_type<T2, int16_t, uint16_t, int32_t,
1369  uint32_t, int64_t, uint64_t>(),
1370  std::remove_const_t<T0>>
1371  ror(T1 src0, T2 src1) {
1372  simd<T1, 1> Src0 = src0;
1373  simd<T0, 1> Result = esimd::ror<T0, T1, 1, T2>(Src0, src1);
1374  return Result[0];
1375 }
1376 
1378 
1381 
1394 template <typename T1, typename T2, typename T3, typename T4, int N,
1395  class Sat = saturation_off_tag>
1396 __ESIMD_API std::enable_if_t<
1397  detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
1398  detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
1401 #if defined(__SYCL_DEVICE_ONLY__)
1402  simd<T1, N> Result;
1403  simd<T2, N> Src0 = src0;
1404  simd<T3, N> Src1 = src1;
1405  simd<T4, N> Src2 = src2;
1406  if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
1407  if constexpr (std::is_unsigned<T1>::value) {
1408  if constexpr (std::is_unsigned<T2>::value) {
1409  Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1410  Src2.data());
1411  } else {
1412  Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1413  Src2.data());
1414  }
1415  } else {
1416  if constexpr (std::is_unsigned<T2>::value) {
1417  Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1418  Src2.data());
1419  } else {
1420  Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1421  Src2.data());
1422  }
1423  }
1424  } else {
1425  if constexpr (std::is_unsigned<T1>::value) {
1426  if constexpr (std::is_unsigned<T2>::value) {
1427  Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1428  Src2.data());
1429  } else {
1430  Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1431  Src2.data());
1432  }
1433  } else {
1434  if constexpr (std::is_unsigned<T2>::value) {
1435  Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1436  Src2.data());
1437  } else {
1438  Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1439  Src2.data());
1440  }
1441  }
1442  }
1443  return Result;
1444 #else
1445  __ESIMD_UNSUPPORTED_ON_HOST;
1446 #endif // __SYCL_DEVICE_ONLY__
1447 }
1448 
1449 // reduction functions
1450 namespace detail {
1451 template <typename T0, typename T1, int SZ> struct esimd_apply_sum {
1452  template <typename... T>
1454  return v1 + v2;
1455  }
1456 };
1458 template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
1459  template <typename... T>
1461  return v1 * v2;
1462  }
1463 };
1465 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
1466  template <typename... T>
1468  if constexpr (std::is_floating_point<T1>::value) {
1469  return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
1470  } else if constexpr (std::is_unsigned<T1>::value) {
1471  return __esimd_umax<T1, SZ>(v1.data(), v2.data());
1472  } else {
1473  return __esimd_smax<T1, SZ>(v1.data(), v2.data());
1474  }
1475  }
1476 };
1478 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
1479  template <typename... T>
1481  if constexpr (std::is_floating_point<T1>::value) {
1482  return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
1483  } else if constexpr (std::is_unsigned<T1>::value) {
1484  return __esimd_umin<T1, SZ>(v1.data(), v2.data());
1485  } else {
1486  return __esimd_smin<T1, SZ>(v1.data(), v2.data());
1487  }
1488  }
1489 };
1490 
1491 template <typename T0, typename T1, int SZ,
1492  template <typename RT, typename T, int N> class OpType>
1494  if constexpr (SZ == 1) {
1495  return v[0];
1496  } else {
1497  static_assert(detail::isPowerOf2(SZ),
1498  "Invaid input for reduce_single - the vector size must "
1499  "be power of two.");
1500  constexpr int N = SZ / 2;
1501  simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
1502  v.template select<N, 1>(N));
1503  return reduce_single<T0, T0, N, OpType>(tmp);
1504  }
1505 }
1506 
1507 template <typename T0, typename T1, int N1, int N2,
1508  template <typename RT, typename T, int N> class OpType>
1510  if constexpr (N1 == N2) {
1511  simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
1512  return reduce_single<T0, T0, N1, OpType>(tmp);
1513  } else if constexpr (N1 < N2) {
1514  simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
1515  constexpr int N = N2 - N1;
1516  using NT = simd<T0, N>;
1517  NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
1518  return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
1519  } else {
1520  static_assert(detail::isPowerOf2(N1),
1521  "Invaid input for reduce_pair - N1 must be power of two.");
1522  constexpr int N = N1 / 2;
1523  simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
1524  v1.template select<N, 1>(N));
1525  using NT = simd<T0, N2>;
1526  NT tmp2 = convert<T0>(v2);
1527  return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
1528  }
1529 }
1530 
1531 template <typename T0, typename T1, int SZ,
1532  template <typename RT, typename T, int N> class OpType>
1533 T0 reduce(simd<T1, SZ> v) {
1534  constexpr bool isPowerOf2 = detail::isPowerOf2(SZ);
1535  if constexpr (isPowerOf2) {
1536  return reduce_single<T0, T1, SZ, OpType>(v);
1537  } else {
1538  constexpr unsigned N1 = 1u << detail::log2<SZ>();
1539  constexpr unsigned N2 = SZ - N1;
1540 
1541  simd<T1, N1> v1 = v.template select<N1, 1>(0);
1542  simd<T1, N2> v2 = v.template select<N2, 1>(N1);
1543  return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1544  }
1545 };
1546 
1547 template <typename T0, typename T1, int SZ>
1548 ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd<T1, SZ> v) {
1549  using TT = detail::computation_type_t<simd<T1, SZ>>;
1550  using RT = typename TT::element_type;
1551  T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1552  return retv;
1553 }
1554 
1555 template <typename T0, typename T1, int SZ>
1556 ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd<T1, SZ> v) {
1557  using TT = detail::computation_type_t<simd<T1, SZ>>;
1558  using RT = typename TT::element_type;
1559  T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1560  return retv;
1561 }
1562 } // namespace detail
1564 
1572 template <typename T0, typename T1, int SZ>
1573 ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd<T1, SZ> v) {
1574  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1575  return retv;
1576 }
1577 
1585 template <typename T0, typename T1, int SZ>
1586 ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd<T1, SZ> v) {
1587  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1588  return retv;
1589 }
1590 
1604 // TODO 1) enforce BinaryOperation constraints 2) support std::minimum/maximum
1605 template <typename T0, typename T1, int SZ, typename BinaryOperation>
1606 ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd<T1, SZ> v, BinaryOperation op) {
1607  if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1608  std::plus<>>::value) {
1609  T0 retv = detail::sum<T0>(v);
1610  return retv;
1611  } else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1612  std::multiplies<>>::value) {
1613  T0 retv = detail::prod<T0>(v);
1614  return retv;
1615  }
1616 }
1617 
1620 
1625 enum class bfn_t : uint8_t { x = 0xAA, y = 0xCC, z = 0xF0 };
1627 static constexpr bfn_t operator~(bfn_t x) {
1628  uint8_t val = static_cast<uint8_t>(x);
1629  uint8_t res = ~val;
1630  return static_cast<bfn_t>(res);
1631 }
1633 static constexpr bfn_t operator|(bfn_t x, bfn_t y) {
1634  uint8_t arg0 = static_cast<uint8_t>(x);
1635  uint8_t arg1 = static_cast<uint8_t>(y);
1636  uint8_t res = arg0 | arg1;
1637  return static_cast<bfn_t>(res);
1638 }
1640 static constexpr bfn_t operator&(bfn_t x, bfn_t y) {
1641  uint8_t arg0 = static_cast<uint8_t>(x);
1642  uint8_t arg1 = static_cast<uint8_t>(y);
1643  uint8_t res = arg0 & arg1;
1644  return static_cast<bfn_t>(res);
1645 }
1647 static constexpr bfn_t operator^(bfn_t x, bfn_t y) {
1648  uint8_t arg0 = static_cast<uint8_t>(x);
1649  uint8_t arg1 = static_cast<uint8_t>(y);
1650  uint8_t res = arg0 ^ arg1;
1651  return static_cast<bfn_t>(res);
1652 }
1653 
1662 template <bfn_t FuncControl, typename T, int N>
1663 __ESIMD_API std::enable_if_t<std::is_integral_v<T>, __ESIMD_NS::simd<T, N>>
1664 bfn(__ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
1665  __ESIMD_NS::simd<T, N> src2) {
1666  if constexpr ((sizeof(T) == 8) || ((sizeof(T) == 1) && (N % 4 == 0)) ||
1667  ((sizeof(T) == 2) && (N % 2 == 0))) {
1668  // Bitcast Nx8-byte vectors to 2xN vectors of 4-byte integers.
1669  // Bitcast Nx1-byte vectors to N/4 vectors of 4-byte integers.
1670  // Bitcast Nx2-byte vectors to N/2 vectors of 4-byte integers.
1671  auto Result = __ESIMD_NS::bfn<FuncControl>(
1672  src0.template bit_cast_view<int32_t>().read(),
1673  src1.template bit_cast_view<int32_t>().read(),
1674  src2.template bit_cast_view<int32_t>().read());
1675  return Result.template bit_cast_view<T>();
1676  } else if constexpr (sizeof(T) == 2 || sizeof(T) == 4) {
1677  constexpr uint8_t FC = static_cast<uint8_t>(FuncControl);
1678  return __esimd_bfn<FC, T, N>(src0.data(), src1.data(), src2.data());
1679  } else if constexpr (N % 2 == 0) {
1680  // Bitcast Nx1-byte vectors (N is even) to N/2 vectors of 2-byte integers.
1681  auto Result = __ESIMD_NS::bfn<FuncControl>(
1682  src0.template bit_cast_view<int16_t>().read(),
1683  src1.template bit_cast_view<int16_t>().read(),
1684  src2.template bit_cast_view<int16_t>().read());
1685  return Result.template bit_cast_view<T>();
1686  } else {
1687  // Odd number of 1-byte elements.
1688  __ESIMD_NS::simd<T, N + 1> Src0, Src1, Src2;
1689  Src0.template select<N, 1>() = src0;
1690  Src1.template select<N, 1>() = src1;
1691  Src2.template select<N, 1>() = src2;
1692  auto Result = __ESIMD_NS::bfn<FuncControl>(Src0, Src1, Src2);
1693  return Result.template select<N, 1>();
1694  }
1695 }
1696 
1704 template <bfn_t FuncControl, typename T>
1705 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
1706  __ESIMD_DNS::is_esimd_scalar<T>::value && std::is_integral_v<T>, T>
1707 bfn(T src0, T src1, T src2) {
1708  __ESIMD_NS::simd<T, 1> Src0 = src0;
1709  __ESIMD_NS::simd<T, 1> Src1 = src1;
1710  __ESIMD_NS::simd<T, 1> Src2 = src2;
1711  __ESIMD_NS::simd<T, 1> Result =
1712  esimd::bfn<FuncControl, T, 1>(Src0, Src1, Src2);
1713  return Result[0];
1714 }
1715 
1717 
1724 template <int N>
1725 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1726 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1727  __ESIMD_NS::simd<uint32_t, N> src1) {
1728  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1729  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1730  Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());
1731 
1732  carry = Result.first;
1733  return Result.second;
1734 }
1735 
1742 template <int N>
1743 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1744 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1745  uint32_t src1) {
1746  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1747  return addc(carry, src0, Src1V);
1748 }
1749 
1756 template <int N>
1757 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1758 addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
1759  __ESIMD_NS::simd<uint32_t, N> src1) {
1760  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1761  return addc(carry, Src0V, src1);
1762 }
1763 
1770 __ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
1771  __ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
1772  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1773  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1774  __ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
1775  carry = CarryV[0];
1776  return Res[0];
1777 }
1778 
1786 template <int N>
1787 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1788 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1789  __ESIMD_NS::simd<uint32_t, N> src1) {
1790  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1791  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1792  Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());
1793 
1794  borrow = Result.first;
1795  return Result.second;
1796 }
1797 
1805 template <int N>
1806 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1807 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1808  uint32_t src1) {
1809  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1810  return subb(borrow, src0, Src1V);
1811 }
1812 
1820 template <int N>
1821 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1822 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
1823  __ESIMD_NS::simd<uint32_t, N> src1) {
1824  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1825  return subb(borrow, Src0V, src1);
1826 }
1827 
1835 __ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
1836  __ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
1837  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1838  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1839  __ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
1840  borrow = BorrowV[0];
1841  return Res[0];
1842 }
1843 
1846 __ESIMD_API uint64_t rdtsc() {
1847 #ifdef __SYCL_DEVICE_ONLY__
1848  return __spirv_ReadClockKHR(0);
1849 #else
1850  __ESIMD_UNSUPPORTED_ON_HOST;
1851 #endif
1852 }
1853 
1855 
1856 } // namespace ext::intel::esimd
1857 } // namespace _V1
1858 } // namespace sycl
const auto & data() const noexcept
Definition: simd.hpp:1673
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
Definition: simd_view.hpp:64
The main simd vector class.
Definition: simd.hpp:53
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > ror(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate right operation with two vector inputs.
Definition: math.hpp:1325
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > asr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Arithmetical Shift Right (vector version)
Definition: math.hpp:1123
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > lsr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Logical Shift Right (vector version)
Definition: math.hpp:1050
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), uint > pack_mask(simd_mask< N > src0)
Pack a simd_mask into a single unsigned 32-bit integer value.
Definition: math.hpp:744
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift right operation (vector version)
Definition: math.hpp:1194
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)==4), simd< T, N > > fbl(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the least signif...
Definition: math.hpp:845
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shl(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift left operation (vector version)
Definition: math.hpp:950
__ESIMD_API std::enable_if_t<(std::is_same_v< T, ushort >||std::is_same_v< T, uint >) &&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
Definition: math.hpp:783
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > rol(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate left operation with two vector inputs.
Definition: math.hpp:1260
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)<=4), simd< uint32_t, N > > cbit(simd< T, N > src)
Count number of bits set in the source operand per element.
Definition: math.hpp:802
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_signed< T >::value &&(sizeof(T)==4), simd< T, N > > fbh(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the most signifi...
Definition: math.hpp:889
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
Definition: math.hpp:758
#define __ESIMD_INTRINSIC_DEF(name)
Definition: math.hpp:608
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndu(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-up (also known as ceil).
Definition: math.hpp:652
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndd(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-down (also known as floor).
Definition: math.hpp:643
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > trunc(const sycl::ext::intel::esimd::simd< float, SZ > &src0, Sat sat={})
Round to integral value using the round to zero rounding mode (vector version).
Definition: math.hpp:712
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > floor(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Floor" operation, vector version - alias of rndd.
Definition: math.hpp:679
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rnde(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-even (also known as round).
Definition: math.hpp:660
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > ceil(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Ceiling" operation, vector version - alias of rndu.
Definition: math.hpp:692
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndz(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-zero (also known as trunc).
Definition: math.hpp:668
__ESIMD_API std::enable_if_t< std::is_integral_v< T >, sycl::ext::intel::esimd::simd< T, N > > bfn(sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd< T, N > src2)
Performs binary function computation with three vector operands.
Definition: math.hpp:1663
static constexpr bfn_t operator&(bfn_t x, bfn_t y)
Definition: math.hpp:1639
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
Definition: math.hpp:1624
static constexpr bfn_t operator|(bfn_t x, bfn_t y)
Definition: math.hpp:1632
static constexpr bfn_t operator~(bfn_t x)
Definition: math.hpp:1626
static constexpr bfn_t operator^(bfn_t x, bfn_t y)
Definition: math.hpp:1646
__ESIMD_API simd< T, N > cos(simd< T, N > src, Sat sat={})
Cosine.
Definition: math.hpp:440
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
Definition: math.hpp:406
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
Definition: math.hpp:432
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:369
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
Definition: math.hpp:502
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:468
__ESIMD_API simd< T, N > inv(simd< T, N > src, Sat sat={})
Inversion - calculates (1/x).
Definition: math.hpp:392
__ESIMD_API simd< T, N > sqrt_ieee(simd< T, N > src, Sat sat={})
IEEE754-compliant square root. Supports float and double.
Definition: math.hpp:411
#define __ESIMD_EMATH_SPIRV_COND
Definition: math.hpp:387
__ESIMD_API simd< T, N > div_ieee(simd< T, N > src0, simd< U, N > src1, Sat sat={})
IEEE754-compliant floating-point division. Supports float and double.
Definition: math.hpp:543
__ESIMD_API simd< T, N > sin(simd< T, N > src, Sat sat={})
Sine.
Definition: math.hpp:436
#define __ESIMD_EMATH_IEEE_COND
Definition: math.hpp:384
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
Definition: math.hpp:398
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
Definition: math.hpp:402
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
Definition: math.hpp:1572
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:184
__ESIMD_API simd< T, SZ >(max)(simd< T
Selects component-wise the maximum of the two vectors.
ESIMD_DETAIL ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > log(simd< T, SZ > src0, Sat sat={})
Computes the natural logarithm of the given argument.
Definition: math.hpp:568
__ESIMD_API SZ simd< T, SZ > Sat sat
Definition: math.hpp:184
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
Definition: math.hpp:1605
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() min(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:302
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
Definition: math.hpp:72
__ESIMD_API SZ simd< T, SZ > Sat int SZ
Definition: math.hpp:215
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > addc(sycl::ext::intel::esimd::simd< uint32_t, N > &carry, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs add with carry of 2 unsigned 32-bit vectors.
Definition: math.hpp:1725
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() max(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:217
__ESIMD_API uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
Definition: math.hpp:1845
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
Definition: math.hpp:1585
ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > exp(simd< T, SZ > src0, Sat sat={})
Computes e raised to the power of the given argument.
Definition: math.hpp:589
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
Definition: math.hpp:215
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > subb(sycl::ext::intel::esimd::simd< uint32_t, N > &borrow, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs substraction with borrow of 2 unsigned 32-bit vectors.
Definition: math.hpp:1787
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
Definition: math.hpp:1399
__ESIMD_API SZ src0
Definition: math.hpp:184
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
Definition: math.hpp:131
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
T0 reduce(simd< T1, SZ > v)
Definition: math.hpp:1532
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
Definition: math.hpp:1555
T0 reduce_single(simd< T1, SZ > v)
Definition: math.hpp:1492
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
Definition: math.hpp:1547
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
Definition: math.hpp:1508
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
autodecltype(x) x
Definition: access.hpp:18
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1459
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1466
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1479
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1452
This type tag represents "saturation off" behavior.
Definition: common.hpp:50