DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
21 
22 #include <cstdint>
23 
24 namespace sycl {
25 inline namespace _V1 {
26 namespace ext::intel::esimd {
27 
40 
43 
68 template <typename T0, typename T1, int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70  std::is_same_v<T1, T0>,
73  if constexpr (detail::is_generic_floating_point_v<T0>)
74  return __esimd_sat<T0, T1, SZ>(src.data());
75  else if constexpr (detail::is_generic_floating_point_v<T1>) {
76  if constexpr (std::is_unsigned_v<T0>)
77  return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
78  else
79  return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
80  } else if constexpr (std::is_unsigned_v<T0>) {
81  if constexpr (std::is_unsigned_v<T1>)
82  return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
83  else
84  return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
85  } else {
86  if constexpr (std::is_signed_v<T1>)
87  return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
88  else
89  return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
90  }
91 }
92 
94 // abs
95 namespace detail {
96 
97 template <typename TRes, typename TArg, int SZ>
98 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
99 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
100  simd<TArg, SZ> Result;
101  if constexpr (detail::is_generic_floating_point_v<TArg>) {
102  using CppT = __ESIMD_DNS::element_type_traits<TArg>::EnclosingCppT;
103  Result =
104  __ESIMD_DNS::convert_vector<TArg, CppT, SZ>(__spirv_ocl_fabs<CppT, SZ>(
105  __ESIMD_DNS::convert_vector<CppT, TArg, SZ>(src0.data())));
106  } else
107  Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
108  return convert<TRes>(Result);
109 }
110 
111 template <typename TRes, typename TArg>
112 
113 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
114  detail::is_esimd_scalar<TArg>::value,
115  TRes>
116 __esimd_abs_common_internal(TArg src0) {
117  simd<TArg, 1> Src0 = src0;
118  simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
119  return convert<TRes>(Result)[0];
120 }
121 } // namespace detail
123 
130 template <typename TRes, typename TArg, int SZ>
131 __ESIMD_API std::enable_if_t<
132  !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
135  return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(src0.data());
136 }
137 
143 template <typename TRes, typename TArg>
144 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
145  std::remove_const_t<TArg>>::value &&
146  detail::is_esimd_scalar<TRes>::value &&
147  detail::is_esimd_scalar<TArg>::value,
148  std::remove_const_t<TRes>>
149 abs(TArg src0) {
150  return detail::__esimd_abs_common_internal<TRes, TArg>(src0);
151 }
152 
160 template <typename T1, int SZ> __ESIMD_API simd<T1, SZ> abs(simd<T1, SZ> src0) {
161  return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
162 }
163 
170 template <typename T1>
171 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
172  std::remove_const_t<T1>>
173 abs(T1 src0) {
174  return detail::__esimd_abs_common_internal<T1, T1>(src0);
175 }
176 
186 template <typename T, int SZ, class Sat = saturation_off_tag>
188  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
189 
190  if constexpr (detail::is_generic_floating_point_v<T>) {
191  using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
192  auto Result =
193  __ESIMD_DNS::convert_vector<T, CppT, SZ>(__spirv_ocl_fmax<CppT, SZ>(
194  __ESIMD_DNS::convert_vector<CppT, T, SZ>(src0.data()),
195  __ESIMD_DNS::convert_vector<CppT, T, SZ>(src1.data())));
196  if constexpr (is_sat)
197  Result = __esimd_sat<T, T, SZ>(Result);
198  return simd<T, SZ>(Result);
199  } else if constexpr (std::is_unsigned<T>::value) {
200  auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
201  if constexpr (is_sat)
202  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
203  return simd<T, SZ>(Result);
204  } else {
205  auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
206  if constexpr (is_sat)
207  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
208  return simd<T, SZ>(Result);
209  }
210 }
211 
222 template <typename T, int SZ, class Sat = saturation_off_tag>
223 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
224  max)(simd<T, SZ> src0, T src1, Sat sat = {}) {
225  simd<T, SZ> Src1 = src1;
226  simd<T, SZ> Result = (esimd::max)(src0, Src1, sat);
227  return Result;
228 }
229 
240 template <typename T, int SZ, class Sat = saturation_off_tag>
241 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
242  max)(T src0, simd<T, SZ> src1, Sat sat = {}) {
243  simd<T, SZ> Src0 = src0;
244  simd<T, SZ> Result = (esimd::max)(Src0, src1, sat);
245  return Result;
246 }
247 
256 template <typename T, class Sat = saturation_off_tag>
257 ESIMD_NODEBUG ESIMD_INLINE
258 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(max)(T src0, T src1,
259  Sat sat = {}) {
260  simd<T, 1> Src0 = src0;
261  simd<T, 1> Src1 = src1;
262  simd<T, 1> Result = (esimd::max)(Src0, Src1, sat);
263  return Result[0];
264 }
265 
275 template <typename T, int SZ, class Sat = saturation_off_tag>
277  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
278 
279  if constexpr (detail::is_generic_floating_point_v<T>) {
280  using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
281  auto Result =
282  __ESIMD_DNS::convert_vector<T, CppT, SZ>(__spirv_ocl_fmin<CppT, SZ>(
283  __ESIMD_DNS::convert_vector<CppT, T, SZ>(src0.data()),
284  __ESIMD_DNS::convert_vector<CppT, T, SZ>(src1.data())));
285  if constexpr (is_sat)
286  Result = __esimd_sat<T, T, SZ>(Result);
287  return simd<T, SZ>(Result);
288  } else if constexpr (std::is_unsigned<T>::value) {
289  auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
290  if constexpr (is_sat)
291  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
292  return simd<T, SZ>(Result);
293  } else {
294  auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
295  if constexpr (is_sat)
296  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
297  return simd<T, SZ>(Result);
298  }
299 }
300 
311 template <typename T, int SZ, class Sat = saturation_off_tag>
312 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
313  min)(simd<T, SZ> src0, T src1, Sat sat = {}) {
314  simd<T, SZ> Src1 = src1;
315  simd<T, SZ> Result = (esimd::min)(src0, Src1, sat);
316  return Result;
317 }
318 
329 template <typename T, int SZ, class Sat = saturation_off_tag>
330 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
331  min)(T src0, simd<T, SZ> src1, Sat sat = {}) {
332  simd<T, SZ> Src0 = src0;
333  simd<T, SZ> Result = (esimd::min)(Src0, src1, sat);
334  return Result;
335 }
336 
345 template <typename T, class Sat = saturation_off_tag>
346 ESIMD_NODEBUG ESIMD_INLINE
347 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(min)(T src0, T src1,
348  Sat sat = {}) {
349  simd<T, 1> Src0 = src0;
350  simd<T, 1> Src1 = src1;
351  simd<T, 1> Result = (esimd::min)(Src0, Src1, sat);
352  return Result[0];
353 }
354 
356 
359 
360 #if defined(__SYCL_DEVICE_ONLY__)
361 #define __ESIMD_VECTOR_IMPL(T, name, iname) \
362  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
363  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>, N>(src.data()); \
364  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
365  return res; \
366  else \
367  return esimd::saturate<T>(simd<T, N>(res));
368 #define __ESIMD_SCALAR_IMPL(T, name, iname) \
369  __ESIMD_DNS::__raw_t<T> res = \
370  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>>(src); \
371  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
372  return res; \
373  else \
374  return esimd::saturate<T>(simd<T, 1>(res))[0];
375 #else
376 #define __ESIMD_VECTOR_IMPL(T, name, iname) return 0;
377 #define __ESIMD_SCALAR_IMPL(T, name, iname) return 0;
378 #endif // __SYCL_DEVICE_ONLY__
379 
380 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
381  \
382  template <class T, int N, class Sat = saturation_off_tag, \
383  class = std::enable_if_t<COND>> \
384  __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
385  __ESIMD_VECTOR_IMPL(T, name, iname) \
386  } \
387  \
388  \
389  template <typename T, class Sat = saturation_off_tag, \
390  class = std::enable_if_t<COND>> \
391  __ESIMD_API T name(T src, Sat sat = {}) { \
392  __ESIMD_SCALAR_IMPL(T, name, iname) \
393  }
394 
395 #define __ESIMD_EMATH_IEEE_COND \
396  detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
397 
398 #define __ESIMD_EMATH_SPIRV_COND \
399  std::is_same_v<T, float> || std::is_same_v<T, sycl::half>
400 
403 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, inv, recip)
404 
410 
414 
415 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, sqrt, sqrt)
418 
419 template <class T, int N, class Sat = saturation_off_tag,
421  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
422 __ESIMD_API simd<T, N> sqrt_ieee(simd<T, N> src, Sat sat = {}) {
423  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res =
424  __esimd_ieee_sqrt<T, N>(src.data());
425  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
426  return res;
427  else
428  return esimd::saturate<T>(simd<T, N>(res));
429 }
430 
432 template <typename T, class Sat = saturation_off_tag,
433  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
434 __ESIMD_API T sqrt_ieee(T src, Sat sat = {}) {
435  simd<T, 1> src_vec = src;
436  simd<T, 1> res = sqrt_ieee<T, 1>(src_vec, sat);
437  return res[0];
438 }
439 
444 
448 
452 
453 template <class T, int N, class Sat = saturation_off_tag>
457 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, simd<double, N>>
458 rsqrt(simd<T, N> src, Sat sat = {}) {
459  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
460  return inv(sqrt(src));
461  else
462  return esimd::saturate<double>(inv(sqrt(src)));
463 }
464 
466 template <class T, class Sat = saturation_off_tag>
467 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, double>
468 rsqrt(T src, Sat sat = {}) {
469  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
470  return inv(sqrt(src));
471  else
472  return esimd::saturate<double>(inv(sqrt(src)));
473 }
474 
475 #undef __ESIMD_UNARY_INTRINSIC_DEF
476 #undef __ESIMD_VECTOR_IMPL
477 #undef __ESIMD_SCALAR_IMPL
478 
479 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
480  \
481  template <class T, int N, class U, class Sat = saturation_off_tag, \
482  class = std::enable_if_t<COND>> \
483  __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
484  Sat sat = {}) { \
485  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
486  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
487  RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
488  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
489  return res_raw; \
490  else \
491  return esimd::saturate<T>(simd<T, N>(res_raw)); \
492  } \
493  \
494  \
495  template <class T, int N, class U, class Sat = saturation_off_tag, \
496  class = std::enable_if_t<COND>> \
497  __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
498  return name<T, N, U>(src0, simd<U, N>(src1), sat); \
499  } \
500  \
501  \
502  template <class T, class U, class Sat = saturation_off_tag, \
503  class = std::enable_if_t<COND>> \
504  __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
505  simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
506  return res[0]; \
507  }
508 
511 template <class T, int N, class U, class Sat = saturation_off_tag,
512  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
514 #if defined(__SYCL_DEVICE_ONLY__)
515  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>;
516  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data());
517  RawVecT res_raw = __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>, N>(
518  src0.data(), src1_raw_conv);
519  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
520  return res_raw;
521  else
522  return esimd::saturate<T>(simd<T, N>(res_raw));
523 #else
524  return 0;
525 #endif // __SYCL_DEVICE_ONLY__
526 }
527 
529 template <class T, int N, class U, class Sat = saturation_off_tag,
530  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
531 __ESIMD_API simd<T, N> pow(simd<T, N> src0, U src1, Sat sat = {}) {
532  return pow<T, N, U>(src0, simd<U, N>(src1), sat);
533 }
534 
536 template <class T, class U, class Sat = saturation_off_tag,
537  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
538 __ESIMD_API T pow(T src0, U src1, Sat sat = {}) {
539 #if defined(__SYCL_DEVICE_ONLY__)
540  using ResT = __ESIMD_DNS::__raw_t<T>;
541  ResT src1_raw_conv = detail::convert_scalar<T, U>(src1);
542  ResT res_raw =
543  __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>>(src0, src1_raw_conv);
544  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
545  return res_raw;
546  else
547  return esimd::saturate<T>(simd<T, 1>(res_raw))[0];
548 #else
549  return 0;
550 #endif // __SYCL_DEVICE_ONLY__
551 }
552 
555 
556 #undef __ESIMD_BINARY_INTRINSIC_DEF
557 #undef __ESIMD_EMATH_IEEE_COND
558 #undef __ESIMD_EMATH_SPIRV_COND
559 
561 
564 
566 namespace detail {
567 // std::numbers::ln2_v<float> in c++20
568 constexpr float ln2 = 0.69314718f;
569 // std::numbers::log2e_v<float> in c++20
570 constexpr float log2e = 1.442695f;
571 } // namespace detail
573 
578 template <class T, int SZ, class Sat = saturation_off_tag>
579 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> log(simd<T, SZ> src0, Sat sat = {}) {
580  using CppT = __ESIMD_DNS::__cpp_t<T>;
581  simd<T, SZ> Result =
582  esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
583 
584  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
585  return Result;
586  else
587  return esimd::saturate<T>(Result);
588 }
589 
590 template <class T, class Sat = saturation_off_tag>
591 ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat = {}) {
592  return esimd::log<T, 1>(src0, sat)[0];
593 }
594 
599 template <class T, int SZ, class Sat = saturation_off_tag>
600 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> exp(simd<T, SZ> src0, Sat sat = {}) {
601  using CppT = __ESIMD_DNS::__cpp_t<T>;
602  return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
603 }
604 
605 template <class T, class Sat = saturation_off_tag>
606 ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat = {}) {
607  return esimd::exp<T, 1>(src0, sat)[0];
608 }
609 
611 
614 
616 // Rounding intrinsics.
618 
619 #define __ESIMD_INTRINSIC_DEF(name) \
620  \
621  \
622  \
624  \
625  \
626  \
627  template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
628  __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
629  Sat sat = {}) { \
630  __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
631  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
632  return Result; \
633  else if constexpr (!std::is_same_v<float, T>) { \
634  auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
635  return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
636  } else { \
637  return __ESIMD_NS::saturate<T>(Result); \
638  } \
639  } \
640  \
641  template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
642  __ESIMD_API T name(float src0, Sat sat = {}) { \
643  __ESIMD_NS::simd<float, 1> Src0 = src0; \
644  __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
645  return Result[0]; \
646  }
647 
656 
665 
673 
681 
682 #undef __ESIMD_INTRINSIC_DEF
684 
687 
689 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
690 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
691 floor(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
692  return esimd::rndd<RT, SZ>(src0, sat);
693 }
694 
696 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
697 ESIMD_INLINE RT floor(float src0, Sat sat = {}) {
698  return esimd::rndd<RT, 1U>(src0, sat)[0];
699 }
700 
702 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
703 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
704 ceil(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
705  return esimd::rndu<RT, SZ>(src0, sat);
706 }
707 
709 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
710 ESIMD_INLINE RT ceil(float src0, Sat sat = {}) {
711  return esimd::rndu<RT, 1U>(src0, sat);
712 }
713 
722 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
723 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
724 trunc(const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
725  return esimd::rndz<RT, SZ>(src0, sat);
726 }
727 
735 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
736 __ESIMD_API RT trunc(float src0, Sat sat = {}) {
737  return esimd::rndz<RT, 1U>(src0, sat)[0];
738 }
739 
741 
744 
753 template <int N>
754 ESIMD_NODEBUG
755  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
757  return __esimd_pack_mask<N>(src0.data());
758 }
759 
767 template <int N>
768 ESIMD_NODEBUG
769  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
771  return __esimd_unpack_mask<N>(src0);
772 }
773 
776 template <int N>
777 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
779  simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
780  src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
781  return esimd::pack_mask(src_0);
782 }
783 
790 template <typename T, int N>
791 __ESIMD_API
792  std::enable_if_t<(std::is_same_v<T, ushort> || std::is_same_v<T, uint>) &&
793  (N > 0 && N <= 32),
795  ballot(simd<T, N> mask) {
796  simd_mask<N> cmp = (mask != 0);
797  if constexpr (N == 8 || N == 16 || N == 32) {
798  return __esimd_pack_mask<N>(cmp.data());
799  } else {
800  constexpr int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
801  simd<uint16_t, N1> res = 0;
802  res.template select<N, 1>() = cmp.data();
803  return __esimd_pack_mask<N1>(res.data());
804  }
805 }
806 
811 template <typename T, int N>
812 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
813  std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
814 cbit(simd<T, N> src) {
815  return __esimd_cbit<T, N>(src.data());
816 }
817 
820 template <typename T>
821 __ESIMD_API
822  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4), uint32_t>
823  cbit(T src) {
824  simd<T, 1> Src = src;
825  simd<uint32_t, 1> Result = esimd::cbit(Src);
826  return Result[0];
827 }
828 
833 template <typename BaseTy, typename RegionTy>
834 __ESIMD_API std::enable_if_t<
835  std::is_integral<
837  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
839  uint32_t>
841  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
842  simd<Ty, 1> Src = src;
843  simd<uint32_t, 1> Result = esimd::cbit(Src);
844  return Result[0];
845 }
846 
854 template <typename T, int N>
855 __ESIMD_API
856  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), simd<T, N>>
857  fbl(simd<T, N> src) {
858  return __esimd_fbl<T, N>(src.data());
859 }
860 
863 template <typename T>
864 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
865 fbl(T src) {
866  simd<T, 1> Src = src;
867  simd<T, 1> Result = esimd::fbl(Src);
868  return Result[0];
869 }
870 
876 template <typename BaseTy, typename RegionTy>
877 __ESIMD_API std::enable_if_t<
878  std::is_integral<
880  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
884  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
885  simd<Ty, 1> Src = src;
886  simd<Ty, 1> Result = esimd::fbl(Src);
887  return Result[0];
888 }
889 
897 template <typename T, int N>
898 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
899  std::is_signed<T>::value && (sizeof(T) == 4),
901 fbh(simd<T, N> src) {
902  return __esimd_sfbh<T, N>(src.data());
903 }
904 
911 template <typename T, int N>
912 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
913  !std::is_signed<T>::value && (sizeof(T) == 4),
915 fbh(simd<T, N> src) {
916  return __esimd_ufbh<T, N>(src.data());
917 }
918 
921 template <typename T>
922 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
923 fbh(T src) {
924  simd<T, 1> Src = src;
925  simd<T, 1> Result = esimd::fbh(Src);
926  return Result[0];
927 }
928 
934 template <typename BaseTy, typename RegionTy>
935 __ESIMD_API std::enable_if_t<
936  std::is_integral<
938  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
942  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
943  simd<Ty, 1> Src = src;
944  simd<Ty, 1> Result = esimd::fbh(Src);
945  return Result[0];
946 }
947 
958 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
959 __ESIMD_API
960  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
963  using ComputationTy =
964  __ESIMD_DNS::computation_type_t<decltype(src0), int32_t>;
965  ComputationTy Src0 = src0;
966  ComputationTy Src1 = src1;
967 
968  if constexpr (std::is_same_v<Sat, saturation_on_tag>) {
969  if constexpr (std::is_unsigned<T0>::value) {
970  if constexpr (std::is_unsigned<
971  typename ComputationTy::element_type>::value)
972  return __esimd_uushl_sat<T0, typename ComputationTy::element_type, SZ>(
973  Src0.data(), Src1.data());
974  else
975  return __esimd_usshl_sat<T0, typename ComputationTy::element_type, SZ>(
976  Src0.data(), Src1.data());
977  } else {
978  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
979  return __esimd_sushl_sat<T0, typename ComputationTy::element_type, SZ>(
980  Src0.data(), Src1.data());
981  else
982  return __esimd_ssshl_sat<T0, typename ComputationTy::element_type, SZ>(
983  Src0.data(), Src1.data());
984  }
985  } else {
986  if constexpr (std::is_unsigned<T0>::value) {
987  if constexpr (std::is_unsigned<
988  typename ComputationTy::element_type>::value)
989  return __esimd_uushl<T0, typename ComputationTy::element_type, SZ>(
990  Src0.data(), Src1.data());
991  else
992  return __esimd_usshl<T0, typename ComputationTy::element_type, SZ>(
993  Src0.data(), Src1.data());
994  } else {
995  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
996  return __esimd_sushl<T0, typename ComputationTy::element_type, SZ>(
997  Src0.data(), Src1.data());
998  else
999  return __esimd_ssshl<T0, typename ComputationTy::element_type, SZ>(
1000  Src0.data(), Src1.data());
1001  }
1002  }
1003 }
1004 
1014 template <typename T0, typename T1, int SZ, typename U,
1015  class Sat = saturation_off_tag>
1016 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1017  std::is_integral<T1>::value &&
1018  std::is_integral<U>::value,
1020 shl(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1021  simd<U, SZ> Src1 = src1;
1022  return shl<T0, T1, SZ>(src0, Src1, sat);
1023 }
1024 
1034 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1035 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1036  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1037  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1038  std::is_integral<T0>::value &&
1039  std::is_integral<T1>::value &&
1040  std::is_integral<T2>::value,
1041  std::remove_const_t<T0>>
1042 shl(T1 src0, T2 src1, Sat sat = {}) {
1043  simd<T1, 1> Src0 = src0;
1044  simd<T0, 1> Result = shl<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1045  return Result[0];
1046 }
1047 
1058 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1059 __ESIMD_API
1060  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1063  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1064  typedef typename std::make_unsigned<IntermedTy>::type ComputationTy;
1067  simd<ComputationTy, SZ> Result = Src0.data() >> Src1.data();
1068 
1069  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1070  return Result;
1071  else
1072  return saturate<T0>(Result);
1073 }
1074 
1085 template <typename T0, typename T1, int SZ, typename U,
1086  class Sat = saturation_off_tag>
1087 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1088  std::is_integral<T1>::value &&
1089  std::is_integral<U>::value,
1091 lsr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1092  simd<T1, SZ> Src1 = src1;
1093  return lsr<T0, T1, SZ>(src0, Src1, sat);
1094 }
1095 
1106 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1107 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1108  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1109  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1110  std::is_integral<T0>::value &&
1111  std::is_integral<T1>::value &&
1112  std::is_integral<T2>::value,
1113  std::remove_const_t<T0>>
1114 lsr(T1 src0, T2 src1, Sat sat = {}) {
1115  simd<T1, 1> Src0 = src0;
1116  simd<T0, 1> Result = lsr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1117 
1118  return Result[0];
1119 }
1120 
1131 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1132 __ESIMD_API
1133  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1136  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1137  typedef typename std::make_signed<IntermedTy>::type ComputationTy;
1140  simd<ComputationTy, SZ> Result = Src0 >> Src1;
1141  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1142  return Result;
1143  else
1144  return saturate<T0>(Result);
1145 }
1146 
1157 template <typename T0, typename T1, int SZ, typename U,
1158  class Sat = saturation_off_tag>
1159 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1160  std::is_integral<T1>::value &&
1161  std::is_integral<U>::value,
1163 asr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1164  simd<U, SZ> Src1 = src1;
1165  return asr<T0, T1, SZ>(src0, Src1, sat);
1166 }
1167 
1178 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1179 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1180  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1181  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1182  std::is_integral<T0>::value &&
1183  std::is_integral<T1>::value &&
1184  std::is_integral<T2>::value,
1185  std::remove_const_t<T0>>
1186 asr(T1 src0, T2 src1, Sat sat = {}) {
1187  simd<T1, 1> Src0 = src0;
1188  simd<T0, 1> Result = esimd::asr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1189  return Result[0];
1190 }
1191 
1202 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1203 __ESIMD_API
1204  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1207  if constexpr (std::is_unsigned<T1>::value) {
1208  return esimd::lsr<T0, T1, SZ>(src0, src1, sat);
1209  } else {
1210  return esimd::asr<T0, T1, SZ>(src0, src1, sat);
1211  }
1212 }
1213 
1224 template <typename T0, typename T1, int SZ, typename U,
1225  class Sat = saturation_off_tag>
1226 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1227  std::is_integral<T1>::value &&
1228  std::is_integral<U>::value,
1230 shr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1231  simd<U, SZ> Src1 = src1;
1232  return shr<T0, T1, SZ>(src0, Src1, sat);
1233 }
1234 
1244 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1245 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1246  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1247  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1248  std::is_integral<T0>::value &&
1249  std::is_integral<T1>::value &&
1250  std::is_integral<T2>::value,
1251  std::remove_const_t<T0>>
1252 shr(T1 src0, T2 src1, Sat sat = {}) {
1253  simd<T1, 1> Src0 = src0;
1254  simd<T0, 1> Result = shr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1255  return Result[0];
1256 }
1257 
1266 template <typename T0, typename T1, int SZ>
1267 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1268  uint32_t, int64_t, uint64_t>() &&
1269  detail::is_type<T1, int16_t, uint16_t, int32_t,
1270  uint32_t, int64_t, uint64_t>(),
1273  return __esimd_rol<T0, T1, SZ>(src0.data(), src1.data());
1274 }
1275 
1284 template <typename T0, typename T1, int SZ, typename U>
1285 __ESIMD_API
1286  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1287  int64_t, uint64_t>() &&
1288  detail::is_type<T1, int16_t, uint16_t, int32_t,
1289  uint32_t, int64_t, uint64_t>() &&
1290  detail::is_type<U, int16_t, uint16_t, int32_t,
1291  uint32_t, int64_t, uint64_t>(),
1293  rol(simd<T1, SZ> src0, U src1) {
1294  simd<T1, SZ> Src1 = src1;
1295  return rol<T0>(src0, Src1);
1296 }
1297 
1305 template <typename T0, typename T1, typename T2>
1306 __ESIMD_API
1307  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1308  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1309  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1310  detail::is_type<T0, int16_t, uint16_t, int32_t,
1311  uint32_t, int64_t, uint64_t>() &&
1312  detail::is_type<T1, int16_t, uint16_t, int32_t,
1313  uint32_t, int64_t, uint64_t>() &&
1314  detail::is_type<T2, int16_t, uint16_t, int32_t,
1315  uint32_t, int64_t, uint64_t>(),
1316  std::remove_const_t<T0>>
1317  rol(T1 src0, T2 src1) {
1318  simd<T1, 1> Src0 = src0;
1319  simd<T0, 1> Result = rol<T0, T1, 1, T2>(Src0, src1);
1320  return Result[0];
1321 }
1322 
1331 template <typename T0, typename T1, int SZ>
1332 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1333  uint32_t, int64_t, uint64_t>() &&
1334  detail::is_type<T1, int16_t, uint16_t, int32_t,
1335  uint32_t, int64_t, uint64_t>(),
1338  return __esimd_ror<T0, T1, SZ>(src0.data(), src1.data());
1339 }
1340 
1349 template <typename T0, typename T1, int SZ, typename U>
1350 __ESIMD_API
1351  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1352  int64_t, uint64_t>() &&
1353  detail::is_type<T1, int16_t, uint16_t, int32_t,
1354  uint32_t, int64_t, uint64_t>() &&
1355  detail::is_type<U, int16_t, uint16_t, int32_t,
1356  uint32_t, int64_t, uint64_t>(),
1358  ror(simd<T1, SZ> src0, U src1) {
1359  simd<T1, SZ> Src1 = src1;
1360  return esimd::ror<T0>(src0, Src1);
1361 }
1362 
1370 template <typename T0, typename T1, typename T2>
1371 __ESIMD_API
1372  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1373  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1374  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1375  detail::is_type<T0, int16_t, uint16_t, int32_t,
1376  uint32_t, int64_t, uint64_t>() &&
1377  detail::is_type<T1, int16_t, uint16_t, int32_t,
1378  uint32_t, int64_t, uint64_t>() &&
1379  detail::is_type<T2, int16_t, uint16_t, int32_t,
1380  uint32_t, int64_t, uint64_t>(),
1381  std::remove_const_t<T0>>
1382  ror(T1 src0, T2 src1) {
1383  simd<T1, 1> Src0 = src0;
1384  simd<T0, 1> Result = esimd::ror<T0, T1, 1, T2>(Src0, src1);
1385  return Result[0];
1386 }
1387 
1389 
1392 
1405 template <typename T1, typename T2, typename T3, typename T4, int N,
1406  class Sat = saturation_off_tag>
1407 __ESIMD_API std::enable_if_t<
1408  detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
1409  detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
1412 #if defined(__SYCL_DEVICE_ONLY__)
1413  simd<T1, N> Result;
1414  simd<T2, N> Src0 = src0;
1415  simd<T3, N> Src1 = src1;
1416  simd<T4, N> Src2 = src2;
1417  if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
1418  if constexpr (std::is_unsigned<T1>::value) {
1419  if constexpr (std::is_unsigned<T2>::value) {
1420  Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1421  Src2.data());
1422  } else {
1423  Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1424  Src2.data());
1425  }
1426  } else {
1427  if constexpr (std::is_unsigned<T2>::value) {
1428  Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1429  Src2.data());
1430  } else {
1431  Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1432  Src2.data());
1433  }
1434  }
1435  } else {
1436  if constexpr (std::is_unsigned<T1>::value) {
1437  if constexpr (std::is_unsigned<T2>::value) {
1438  Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1439  Src2.data());
1440  } else {
1441  Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1442  Src2.data());
1443  }
1444  } else {
1445  if constexpr (std::is_unsigned<T2>::value) {
1446  Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1447  Src2.data());
1448  } else {
1449  Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1450  Src2.data());
1451  }
1452  }
1453  }
1454  return Result;
1455 #else
1456  __ESIMD_UNSUPPORTED_ON_HOST;
1457 #endif // __SYCL_DEVICE_ONLY__
1458 }
1459 
1460 // reduction functions
1461 namespace detail {
1462 template <typename T0, typename T1, int SZ> struct esimd_apply_sum {
1463  template <typename... T>
1465  return v1 + v2;
1466  }
1467 };
1469 template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
1470  template <typename... T>
1472  return v1 * v2;
1473  }
1474 };
1476 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
1477  template <typename... T>
1479  return __ESIMD_DNS::convert_vector<T0, T1, SZ>(
1480  __ESIMD_NS::max(v1, v2).data());
1481  }
1482 };
1484 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
1485  template <typename... T>
1487  return __ESIMD_DNS::convert_vector<T0, T1, SZ>(
1488  __ESIMD_NS::min(v1, v2).data());
1489  }
1490 };
1491 
1492 template <typename T0, typename T1, int SZ,
1493  template <typename RT, typename T, int N> class OpType>
1495  if constexpr (SZ == 1) {
1496  return v[0];
1497  } else {
1498  static_assert(detail::isPowerOf2(SZ),
1499  "Invaid input for reduce_single - the vector size must "
1500  "be power of two.");
1501  constexpr int N = SZ / 2;
1502  simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
1503  v.template select<N, 1>(N));
1504  return reduce_single<T0, T0, N, OpType>(tmp);
1505  }
1506 }
1507 
1508 template <typename T0, typename T1, int N1, int N2,
1509  template <typename RT, typename T, int N> class OpType>
1511  if constexpr (N1 == N2) {
1512  simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
1513  return reduce_single<T0, T0, N1, OpType>(tmp);
1514  } else if constexpr (N1 < N2) {
1515  simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
1516  constexpr int N = N2 - N1;
1517  using NT = simd<T0, N>;
1518  NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
1519  return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
1520  } else {
1521  static_assert(detail::isPowerOf2(N1),
1522  "Invaid input for reduce_pair - N1 must be power of two.");
1523  constexpr int N = N1 / 2;
1524  simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
1525  v1.template select<N, 1>(N));
1526  using NT = simd<T0, N2>;
1527  NT tmp2 = convert<T0>(v2);
1528  return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
1529  }
1530 }
1531 
1532 template <typename T0, typename T1, int SZ,
1533  template <typename RT, typename T, int N> class OpType>
1534 T0 reduce(simd<T1, SZ> v) {
1535  constexpr bool isPowerOf2 = detail::isPowerOf2(SZ);
1536  if constexpr (isPowerOf2) {
1537  return reduce_single<T0, T1, SZ, OpType>(v);
1538  } else {
1539  constexpr unsigned N1 = 1u << detail::log2<SZ>();
1540  constexpr unsigned N2 = SZ - N1;
1541 
1542  simd<T1, N1> v1 = v.template select<N1, 1>(0);
1543  simd<T1, N2> v2 = v.template select<N2, 1>(N1);
1544  return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1545  }
1546 };
1547 
1548 template <typename T0, typename T1, int SZ>
1549 ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd<T1, SZ> v) {
1550  using TT = detail::computation_type_t<simd<T1, SZ>>;
1551  using RT = typename TT::element_type;
1552  T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1553  return retv;
1554 }
1555 
1556 template <typename T0, typename T1, int SZ>
1557 ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd<T1, SZ> v) {
1558  using TT = detail::computation_type_t<simd<T1, SZ>>;
1559  using RT = typename TT::element_type;
1560  T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1561  return retv;
1562 }
1563 } // namespace detail
1565 
1573 template <typename T0, typename T1, int SZ>
1574 ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd<T1, SZ> v) {
1575  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1576  return retv;
1577 }
1578 
1586 template <typename T0, typename T1, int SZ>
1587 ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd<T1, SZ> v) {
1588  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1589  return retv;
1590 }
1591 
1605 // TODO 1) enforce BinaryOperation constraints 2) support std::minimum/maximum
1606 template <typename T0, typename T1, int SZ, typename BinaryOperation>
1607 ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd<T1, SZ> v, BinaryOperation op) {
1608  if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1609  std::plus<>>::value) {
1610  T0 retv = detail::sum<T0>(v);
1611  return retv;
1612  } else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1613  std::multiplies<>>::value) {
1614  T0 retv = detail::prod<T0>(v);
1615  return retv;
1616  }
1617 }
1618 
1621 
1626 enum class bfn_t : uint8_t { x = 0xAA, y = 0xCC, z = 0xF0 };
1628 static constexpr bfn_t operator~(bfn_t x) {
1629  uint8_t val = static_cast<uint8_t>(x);
1630  uint8_t res = ~val;
1631  return static_cast<bfn_t>(res);
1632 }
1634 static constexpr bfn_t operator|(bfn_t x, bfn_t y) {
1635  uint8_t arg0 = static_cast<uint8_t>(x);
1636  uint8_t arg1 = static_cast<uint8_t>(y);
1637  uint8_t res = arg0 | arg1;
1638  return static_cast<bfn_t>(res);
1639 }
1641 static constexpr bfn_t operator&(bfn_t x, bfn_t y) {
1642  uint8_t arg0 = static_cast<uint8_t>(x);
1643  uint8_t arg1 = static_cast<uint8_t>(y);
1644  uint8_t res = arg0 & arg1;
1645  return static_cast<bfn_t>(res);
1646 }
1648 static constexpr bfn_t operator^(bfn_t x, bfn_t y) {
1649  uint8_t arg0 = static_cast<uint8_t>(x);
1650  uint8_t arg1 = static_cast<uint8_t>(y);
1651  uint8_t res = arg0 ^ arg1;
1652  return static_cast<bfn_t>(res);
1653 }
1654 
1663 template <bfn_t FuncControl, typename T, int N>
1664 __ESIMD_API std::enable_if_t<std::is_integral_v<T>, __ESIMD_NS::simd<T, N>>
1665 bfn(__ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
1666  __ESIMD_NS::simd<T, N> src2) {
1667  if constexpr ((sizeof(T) == 8) || ((sizeof(T) == 1) && (N % 4 == 0)) ||
1668  ((sizeof(T) == 2) && (N % 2 == 0))) {
1669  // Bitcast Nx8-byte vectors to 2xN vectors of 4-byte integers.
1670  // Bitcast Nx1-byte vectors to N/4 vectors of 4-byte integers.
1671  // Bitcast Nx2-byte vectors to N/2 vectors of 4-byte integers.
1672  auto Result = __ESIMD_NS::bfn<FuncControl>(
1673  src0.template bit_cast_view<int32_t>().read(),
1674  src1.template bit_cast_view<int32_t>().read(),
1675  src2.template bit_cast_view<int32_t>().read());
1676  return Result.template bit_cast_view<T>();
1677  } else if constexpr (sizeof(T) == 2 || sizeof(T) == 4) {
1678  constexpr uint8_t FC = static_cast<uint8_t>(FuncControl);
1679  return __esimd_bfn<FC, T, N>(src0.data(), src1.data(), src2.data());
1680  } else if constexpr (N % 2 == 0) {
1681  // Bitcast Nx1-byte vectors (N is even) to N/2 vectors of 2-byte integers.
1682  auto Result = __ESIMD_NS::bfn<FuncControl>(
1683  src0.template bit_cast_view<int16_t>().read(),
1684  src1.template bit_cast_view<int16_t>().read(),
1685  src2.template bit_cast_view<int16_t>().read());
1686  return Result.template bit_cast_view<T>();
1687  } else {
1688  // Odd number of 1-byte elements.
1689  __ESIMD_NS::simd<T, N + 1> Src0, Src1, Src2;
1690  Src0.template select<N, 1>() = src0;
1691  Src1.template select<N, 1>() = src1;
1692  Src2.template select<N, 1>() = src2;
1693  auto Result = __ESIMD_NS::bfn<FuncControl>(Src0, Src1, Src2);
1694  return Result.template select<N, 1>();
1695  }
1696 }
1697 
1705 template <bfn_t FuncControl, typename T>
1706 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
1707  __ESIMD_DNS::is_esimd_scalar<T>::value && std::is_integral_v<T>, T>
1708 bfn(T src0, T src1, T src2) {
1709  __ESIMD_NS::simd<T, 1> Src0 = src0;
1710  __ESIMD_NS::simd<T, 1> Src1 = src1;
1711  __ESIMD_NS::simd<T, 1> Src2 = src2;
1712  __ESIMD_NS::simd<T, 1> Result =
1713  esimd::bfn<FuncControl, T, 1>(Src0, Src1, Src2);
1714  return Result[0];
1715 }
1716 
1718 
1725 template <int N>
1726 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1727 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1728  __ESIMD_NS::simd<uint32_t, N> src1) {
1729  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1730  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1731  Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());
1732 
1733  carry = Result.first;
1734  return Result.second;
1735 }
1736 
1743 template <int N>
1744 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1745 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1746  uint32_t src1) {
1747  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1748  return addc(carry, src0, Src1V);
1749 }
1750 
1757 template <int N>
1758 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1759 addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
1760  __ESIMD_NS::simd<uint32_t, N> src1) {
1761  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1762  return addc(carry, Src0V, src1);
1763 }
1764 
1771 __ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
1772  __ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
1773  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1774  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1775  __ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
1776  carry = CarryV[0];
1777  return Res[0];
1778 }
1779 
1787 template <int N>
1788 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1789 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1790  __ESIMD_NS::simd<uint32_t, N> src1) {
1791  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1792  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1793  Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());
1794 
1795  borrow = Result.first;
1796  return Result.second;
1797 }
1798 
1806 template <int N>
1807 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1808 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1809  uint32_t src1) {
1810  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1811  return subb(borrow, src0, Src1V);
1812 }
1813 
1821 template <int N>
1822 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1823 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
1824  __ESIMD_NS::simd<uint32_t, N> src1) {
1825  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1826  return subb(borrow, Src0V, src1);
1827 }
1828 
1836 __ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
1837  __ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
1838  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1839  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1840  __ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
1841  borrow = BorrowV[0];
1842  return Res[0];
1843 }
1844 
1847 __ESIMD_API uint64_t rdtsc() {
1848 #ifdef __SYCL_DEVICE_ONLY__
1849  return __spirv_ReadClockKHR(0);
1850 #else
1851  __ESIMD_UNSUPPORTED_ON_HOST;
1852 #endif
1853 }
1854 
1862 template <typename T, int N>
1863 __ESIMD_API __ESIMD_NS::simd<T, N> clamp(__ESIMD_NS::simd<T, N> src,
1864  __ESIMD_NS::simd<T, N> min_val,
1865  __ESIMD_NS::simd<T, N> max_val) {
1866  __ESIMD_NS::simd<T, N> Result = src;
1867  Result.merge(min_val, src < min_val);
1868  Result.merge(max_val, src > max_val);
1869  return Result;
1870 }
1871 
1880 template <typename T, int N>
1881 __ESIMD_API __ESIMD_NS::simd<T, N> clamp(__ESIMD_NS::simd<T, N> src, T min_val,
1882  T max_val) {
1883  __ESIMD_NS::simd<T, N> MinVal = min_val;
1884  __ESIMD_NS::simd<T, N> MaxVal = max_val;
1885  return clamp(src, MinVal, MaxVal);
1886 }
1887 
1889 
1890 } // namespace ext::intel::esimd
1891 } // namespace _V1
1892 } // namespace sycl
const auto & data() const noexcept
Definition: simd.hpp:1673
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
Definition: simd_view.hpp:64
The main simd vector class.
Definition: simd.hpp:53
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > ror(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate right operation with two vector inputs.
Definition: math.hpp:1336
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > asr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Arithmetical Shift Right (vector version)
Definition: math.hpp:1134
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > lsr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Logical Shift Right (vector version)
Definition: math.hpp:1061
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), uint > pack_mask(simd_mask< N > src0)
Pack a simd_mask into a single unsigned 32-bit integer value.
Definition: math.hpp:755
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift right operation (vector version)
Definition: math.hpp:1205
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)==4), simd< T, N > > fbl(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the least signif...
Definition: math.hpp:856
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shl(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift left operation (vector version)
Definition: math.hpp:961
__ESIMD_API std::enable_if_t<(std::is_same_v< T, ushort >||std::is_same_v< T, uint >) &&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
Definition: math.hpp:794
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > rol(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate left operation with two vector inputs.
Definition: math.hpp:1271
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)<=4), simd< uint32_t, N > > cbit(simd< T, N > src)
Count number of bits set in the source operand per element.
Definition: math.hpp:813
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_signed< T >::value &&(sizeof(T)==4), simd< T, N > > fbh(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the most signifi...
Definition: math.hpp:900
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
Definition: math.hpp:769
#define __ESIMD_INTRINSIC_DEF(name)
Definition: math.hpp:619
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndu(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-up (also known as ceil).
Definition: math.hpp:663
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndd(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-down (also known as floor).
Definition: math.hpp:654
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > trunc(const sycl::ext::intel::esimd::simd< float, SZ > &src0, Sat sat={})
Round to integral value using the round to zero rounding mode (vector version).
Definition: math.hpp:723
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > floor(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Floor" operation, vector version - alias of rndd.
Definition: math.hpp:690
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rnde(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-even (also known as round).
Definition: math.hpp:671
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > ceil(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Ceiling" operation, vector version - alias of rndu.
Definition: math.hpp:703
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndz(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-zero (also known as trunc).
Definition: math.hpp:679
__ESIMD_API std::enable_if_t< std::is_integral_v< T >, sycl::ext::intel::esimd::simd< T, N > > bfn(sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd< T, N > src2)
Performs binary function computation with three vector operands.
Definition: math.hpp:1664
static constexpr bfn_t operator&(bfn_t x, bfn_t y)
Definition: math.hpp:1640
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
Definition: math.hpp:1625
static constexpr bfn_t operator|(bfn_t x, bfn_t y)
Definition: math.hpp:1633
static constexpr bfn_t operator~(bfn_t x)
Definition: math.hpp:1627
static constexpr bfn_t operator^(bfn_t x, bfn_t y)
Definition: math.hpp:1647
__ESIMD_API simd< T, N > cos(simd< T, N > src, Sat sat={})
Cosine.
Definition: math.hpp:451
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
Definition: math.hpp:417
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
Definition: math.hpp:443
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:380
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
Definition: math.hpp:513
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:479
__ESIMD_API simd< T, N > inv(simd< T, N > src, Sat sat={})
Inversion - calculates (1/x).
Definition: math.hpp:403
__ESIMD_API simd< T, N > sqrt_ieee(simd< T, N > src, Sat sat={})
IEEE754-compliant square root. Supports float and double.
Definition: math.hpp:422
#define __ESIMD_EMATH_SPIRV_COND
Definition: math.hpp:398
__ESIMD_API simd< T, N > div_ieee(simd< T, N > src0, simd< U, N > src1, Sat sat={})
IEEE754-compliant floating-point division. Supports float and double.
Definition: math.hpp:554
__ESIMD_API simd< T, N > sin(simd< T, N > src, Sat sat={})
Sine.
Definition: math.hpp:447
#define __ESIMD_EMATH_IEEE_COND
Definition: math.hpp:395
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
Definition: math.hpp:409
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
Definition: math.hpp:413
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
Definition: math.hpp:1573
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:187
__ESIMD_API simd< T, SZ >(max)(simd< T
Selects component-wise the maximum of the two vectors.
ESIMD_DETAIL ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > log(simd< T, SZ > src0, Sat sat={})
Computes the natural logarithm of the given argument.
Definition: math.hpp:579
__ESIMD_API SZ simd< T, SZ > Sat sat
Definition: math.hpp:187
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
Definition: math.hpp:1606
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() min(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:313
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
Definition: math.hpp:72
__ESIMD_API SZ simd< T, SZ > Sat int SZ
Definition: math.hpp:222
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > addc(sycl::ext::intel::esimd::simd< uint32_t, N > &carry, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs add with carry of 2 unsigned 32-bit vectors.
Definition: math.hpp:1726
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > clamp(sycl::ext::intel::esimd::simd< T, N > src, sycl::ext::intel::esimd::simd< T, N > min_val, sycl::ext::intel::esimd::simd< T, N > max_val)
Performs clamping of values in a vector between min and max values.
Definition: math.hpp:1862
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() max(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:224
__ESIMD_API uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
Definition: math.hpp:1846
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
Definition: math.hpp:1586
ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > exp(simd< T, SZ > src0, Sat sat={})
Computes e raised to the power of the given argument.
Definition: math.hpp:600
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
Definition: math.hpp:222
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > subb(sycl::ext::intel::esimd::simd< uint32_t, N > &borrow, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs substraction with borrow of 2 unsigned 32-bit vectors.
Definition: math.hpp:1788
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
Definition: math.hpp:1410
__ESIMD_API SZ src0
Definition: math.hpp:187
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
Definition: math.hpp:134
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:51
T0 reduce(simd< T1, SZ > v)
Definition: math.hpp:1533
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
Definition: math.hpp:1556
T0 reduce_single(simd< T1, SZ > v)
Definition: math.hpp:1493
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
Definition: math.hpp:1548
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
Definition: math.hpp:1509
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
autodecltype(x) x
Definition: access.hpp:18
sycl::half min(sycl::half a, sycl::half b)
Definition: math.hpp:717
sycl::half max(sycl::half a, sycl::half b)
Definition: math.hpp:735
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1470
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1477
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1485
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1463
This type tag represents "saturation off" behavior.
Definition: common.hpp:50