DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
21 
22 #include <cstdint>
23 
24 namespace sycl {
25 inline namespace _V1 {
26 namespace ext::intel::esimd {
27 
40 
43 
68 template <typename T0, typename T1, int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70  std::is_same_v<T1, T0>,
73  if constexpr (detail::is_generic_floating_point_v<T0>)
74  return __esimd_sat<T0, T1, SZ>(src.data());
75  else if constexpr (detail::is_generic_floating_point_v<T1>) {
76  if constexpr (std::is_unsigned_v<T0>)
77  return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
78  else
79  return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
80  } else if constexpr (std::is_unsigned_v<T0>) {
81  if constexpr (std::is_unsigned_v<T1>)
82  return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
83  else
84  return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
85  } else {
86  if constexpr (std::is_signed_v<T1>)
87  return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
88  else
89  return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
90  }
91 }
92 
94 // abs
95 namespace detail {
96 
97 template <typename TRes, typename TArg, int SZ>
98 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
99 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
100  simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
101  return convert<TRes>(Result);
102 }
103 
104 template <typename TRes, typename TArg>
105 
106 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
107  detail::is_esimd_scalar<TArg>::value,
108  TRes>
109 __esimd_abs_common_internal(TArg src0) {
110  simd<TArg, 1> Src0 = src0;
111  simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
112  return convert<TRes>(Result)[0];
113 }
114 } // namespace detail
116 
123 template <typename TRes, typename TArg, int SZ>
124 __ESIMD_API std::enable_if_t<
125  !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
128  return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(src0.data());
129 }
130 
136 template <typename TRes, typename TArg>
137 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
138  std::remove_const_t<TArg>>::value &&
139  detail::is_esimd_scalar<TRes>::value &&
140  detail::is_esimd_scalar<TArg>::value,
141  std::remove_const_t<TRes>>
142 abs(TArg src0) {
143  return detail::__esimd_abs_common_internal<TRes, TArg>(src0);
144 }
145 
153 template <typename T1, int SZ> __ESIMD_API simd<T1, SZ> abs(simd<T1, SZ> src0) {
154  return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
155 }
156 
163 template <typename T1>
164 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
165  std::remove_const_t<T1>>
166 abs(T1 src0) {
167  return detail::__esimd_abs_common_internal<T1, T1>(src0);
168 }
169 
179 template <typename T, int SZ, class Sat = saturation_off_tag>
181  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
182 
183  if constexpr (std::is_floating_point<T>::value) {
184  auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
185  if constexpr (is_sat)
186  Result = __esimd_sat<T, T, SZ>(Result);
187  return simd<T, SZ>(Result);
188  } else if constexpr (std::is_unsigned<T>::value) {
189  auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
190  if constexpr (is_sat)
191  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
192  return simd<T, SZ>(Result);
193  } else {
194  auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
195  if constexpr (is_sat)
196  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
197  return simd<T, SZ>(Result);
198  }
199 }
200 
211 template <typename T, int SZ, class Sat = saturation_off_tag>
212 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
213  max)(simd<T, SZ> src0, T src1, Sat sat = {}) {
214  simd<T, SZ> Src1 = src1;
215  simd<T, SZ> Result = (esimd::max)(src0, Src1, sat);
216  return Result;
217 }
218 
229 template <typename T, int SZ, class Sat = saturation_off_tag>
230 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
231  max)(T src0, simd<T, SZ> src1, Sat sat = {}) {
232  simd<T, SZ> Src0 = src0;
233  simd<T, SZ> Result = (esimd::max)(Src0, src1, sat);
234  return Result;
235 }
236 
245 template <typename T, class Sat = saturation_off_tag>
246 ESIMD_NODEBUG ESIMD_INLINE
247 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(max)(T src0, T src1,
248  Sat sat = {}) {
249  simd<T, 1> Src0 = src0;
250  simd<T, 1> Src1 = src1;
251  simd<T, 1> Result = (esimd::max)(Src0, Src1, sat);
252  return Result[0];
253 }
254 
264 template <typename T, int SZ, class Sat = saturation_off_tag>
266  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
267 
268  if constexpr (std::is_floating_point<T>::value) {
269  auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
270  if constexpr (is_sat)
271  Result = __esimd_sat<T, T, SZ>(Result);
272  return simd<T, SZ>(Result);
273  } else if constexpr (std::is_unsigned<T>::value) {
274  auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
275  if constexpr (is_sat)
276  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
277  return simd<T, SZ>(Result);
278  } else {
279  auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
280  if constexpr (is_sat)
281  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
282  return simd<T, SZ>(Result);
283  }
284 }
285 
296 template <typename T, int SZ, class Sat = saturation_off_tag>
297 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
298  min)(simd<T, SZ> src0, T src1, Sat sat = {}) {
299  simd<T, SZ> Src1 = src1;
300  simd<T, SZ> Result = (esimd::min)(src0, Src1, sat);
301  return Result;
302 }
303 
314 template <typename T, int SZ, class Sat = saturation_off_tag>
315 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>(
316  min)(T src0, simd<T, SZ> src1, Sat sat = {}) {
317  simd<T, SZ> Src0 = src0;
318  simd<T, SZ> Result = (esimd::min)(Src0, src1, sat);
319  return Result;
320 }
321 
330 template <typename T, class Sat = saturation_off_tag>
331 ESIMD_NODEBUG ESIMD_INLINE
332 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(min)(T src0, T src1,
333  Sat sat = {}) {
334  simd<T, 1> Src0 = src0;
335  simd<T, 1> Src1 = src1;
336  simd<T, 1> Result = (esimd::min)(Src0, Src1, sat);
337  return Result[0];
338 }
339 
341 
344 
345 #if defined(__SYCL_DEVICE_ONLY__)
346 #define __ESIMD_VECTOR_IMPL(T, name, iname) \
347  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
348  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>, N>(src.data()); \
349  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
350  return res; \
351  else \
352  return esimd::saturate<T>(simd<T, N>(res));
353 #define __ESIMD_SCALAR_IMPL(T, name, iname) \
354  __ESIMD_DNS::__raw_t<T> res = \
355  __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t<T>>(src); \
356  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
357  return res; \
358  else \
359  return esimd::saturate<T>(simd<T, 1>(res))[0];
360 #else
361 #define __ESIMD_VECTOR_IMPL(T, name, iname) return 0;
362 #define __ESIMD_SCALAR_IMPL(T, name, iname) return 0;
363 #endif // __SYCL_DEVICE_ONLY__
364 
365 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
366  \
367  template <class T, int N, class Sat = saturation_off_tag, \
368  class = std::enable_if_t<COND>> \
369  __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
370  __ESIMD_VECTOR_IMPL(T, name, iname) \
371  } \
372  \
373  \
374  template <typename T, class Sat = saturation_off_tag, \
375  class = std::enable_if_t<COND>> \
376  __ESIMD_API T name(T src, Sat sat = {}) { \
377  __ESIMD_SCALAR_IMPL(T, name, iname) \
378  }
379 
380 #define __ESIMD_EMATH_IEEE_COND \
381  detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
382 
383 #define __ESIMD_EMATH_SPIRV_COND \
384  std::is_same_v<T, float> || std::is_same_v<T, sycl::half>
385 
388 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, inv, recip)
389 
395 
399 
400 __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v<T>, sqrt, sqrt)
403 
404 template <class T, int N, class Sat = saturation_off_tag,
406  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
407 __ESIMD_API simd<T, N> sqrt_ieee(simd<T, N> src, Sat sat = {}) {
408  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res =
409  __esimd_ieee_sqrt<T, N>(src.data());
410  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
411  return res;
412  else
413  return esimd::saturate<T>(simd<T, N>(res));
414 }
415 
417 template <typename T, class Sat = saturation_off_tag,
418  class = std::enable_if_t<__ESIMD_EMATH_IEEE_COND>>
419 __ESIMD_API T sqrt_ieee(T src, Sat sat = {}) {
420  simd<T, 1> src_vec = src;
421  simd<T, 1> res = sqrt_ieee<T, 1>(src_vec, sat);
422  return res[0];
423 }
424 
429 
433 
437 
438 template <class T, int N, class Sat = saturation_off_tag>
442 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, simd<double, N>>
443 rsqrt(simd<T, N> src, Sat sat = {}) {
444  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
445  return inv(sqrt(src));
446  else
447  return esimd::saturate<double>(inv(sqrt(src)));
448 }
449 
451 template <class T, class Sat = saturation_off_tag>
452 __ESIMD_API std::enable_if_t<std::is_same_v<T, double>, double>
453 rsqrt(T src, Sat sat = {}) {
454  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
455  return inv(sqrt(src));
456  else
457  return esimd::saturate<double>(inv(sqrt(src)));
458 }
459 
460 #undef __ESIMD_UNARY_INTRINSIC_DEF
461 #undef __ESIMD_VECTOR_IMPL
462 #undef __ESIMD_SCALAR_IMPL
463 
464 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
465  \
466  template <class T, int N, class U, class Sat = saturation_off_tag, \
467  class = std::enable_if_t<COND>> \
468  __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
469  Sat sat = {}) { \
470  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
471  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
472  RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
473  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
474  return res_raw; \
475  else \
476  return esimd::saturate<T>(simd<T, N>(res_raw)); \
477  } \
478  \
479  \
480  template <class T, int N, class U, class Sat = saturation_off_tag, \
481  class = std::enable_if_t<COND>> \
482  __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
483  return name<T, N, U>(src0, simd<U, N>(src1), sat); \
484  } \
485  \
486  \
487  template <class T, class U, class Sat = saturation_off_tag, \
488  class = std::enable_if_t<COND>> \
489  __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
490  simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
491  return res[0]; \
492  }
493 
496 template <class T, int N, class U, class Sat = saturation_off_tag,
497  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
499 #if defined(__SYCL_DEVICE_ONLY__)
500  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>;
501  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data());
502  RawVecT res_raw = __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>, N>(
503  src0.data(), src1_raw_conv);
504  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
505  return res_raw;
506  else
507  return esimd::saturate<T>(simd<T, N>(res_raw));
508 #else
509  return 0;
510 #endif // __SYCL_DEVICE_ONLY__
511 }
512 
514 template <class T, int N, class U, class Sat = saturation_off_tag,
515  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
516 __ESIMD_API simd<T, N> pow(simd<T, N> src0, U src1, Sat sat = {}) {
517  return pow<T, N, U>(src0, simd<U, N>(src1), sat);
518 }
519 
521 template <class T, class U, class Sat = saturation_off_tag,
522  class = std::enable_if_t<__ESIMD_EMATH_SPIRV_COND>>
523 __ESIMD_API T pow(T src0, U src1, Sat sat = {}) {
524 #if defined(__SYCL_DEVICE_ONLY__)
525  using ResT = __ESIMD_DNS::__raw_t<T>;
526  ResT src1_raw_conv = detail::convert_scalar<T, U>(src1);
527  ResT res_raw =
528  __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t<T>>(src0, src1_raw_conv);
529  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
530  return res_raw;
531  else
532  return esimd::saturate<T>(simd<T, 1>(res_raw))[0];
533 #else
534  return 0;
535 #endif // __SYCL_DEVICE_ONLY__
536 }
537 
540 
541 #undef __ESIMD_BINARY_INTRINSIC_DEF
542 #undef __ESIMD_EMATH_IEEE_COND
543 #undef __ESIMD_EMATH_SPIRV_COND
544 
546 
549 
551 namespace detail {
552 // std::numbers::ln2_v<float> in c++20
553 constexpr float ln2 = 0.69314718f;
554 // std::numbers::log2e_v<float> in c++20
555 constexpr float log2e = 1.442695f;
556 } // namespace detail
558 
563 template <class T, int SZ, class Sat = saturation_off_tag>
564 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> log(simd<T, SZ> src0, Sat sat = {}) {
565  using CppT = __ESIMD_DNS::__cpp_t<T>;
566  simd<T, SZ> Result =
567  esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
568 
569  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
570  return Result;
571  else
572  return esimd::saturate<T>(Result);
573 }
574 
575 template <class T, class Sat = saturation_off_tag>
576 ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat = {}) {
577  return esimd::log<T, 1>(src0, sat)[0];
578 }
579 
584 template <class T, int SZ, class Sat = saturation_off_tag>
585 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> exp(simd<T, SZ> src0, Sat sat = {}) {
586  using CppT = __ESIMD_DNS::__cpp_t<T>;
587  return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
588 }
589 
590 template <class T, class Sat = saturation_off_tag>
591 ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat = {}) {
592  return esimd::exp<T, 1>(src0, sat)[0];
593 }
594 
596 
599 
601 // Rounding intrinsics.
603 
604 #define __ESIMD_INTRINSIC_DEF(name) \
605  \
606  \
607  \
609  \
610  \
611  \
612  template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
613  __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
614  Sat sat = {}) { \
615  __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
616  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
617  return Result; \
618  else if constexpr (!std::is_same_v<float, T>) { \
619  auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
620  return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
621  } else { \
622  return __ESIMD_NS::saturate<T>(Result); \
623  } \
624  } \
625  \
626  template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
627  __ESIMD_API T name(float src0, Sat sat = {}) { \
628  __ESIMD_NS::simd<float, 1> Src0 = src0; \
629  __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
630  return Result[0]; \
631  }
632 
641 
650 
658 
666 
667 #undef __ESIMD_INTRINSIC_DEF
669 
672 
674 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
675 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
676 floor(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
677  return esimd::rndd<RT, SZ>(src0, sat);
678 }
679 
681 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
682 ESIMD_INLINE RT floor(float src0, Sat sat = {}) {
683  return esimd::rndd<RT, 1U>(src0, sat)[0];
684 }
685 
687 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
688 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
689 ceil(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
690  return esimd::rndu<RT, SZ>(src0, sat);
691 }
692 
694 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
695 ESIMD_INLINE RT ceil(float src0, Sat sat = {}) {
696  return esimd::rndu<RT, 1U>(src0, sat);
697 }
698 
707 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
708 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
709 trunc(const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
710  return esimd::rndz<RT, SZ>(src0, sat);
711 }
712 
720 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
721 __ESIMD_API RT trunc(float src0, Sat sat = {}) {
722  return esimd::rndz<RT, 1U>(src0, sat)[0];
723 }
724 
726 
729 
738 template <int N>
739 ESIMD_NODEBUG
740  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
742  return __esimd_pack_mask<N>(src0.data());
743 }
744 
752 template <int N>
753 ESIMD_NODEBUG
754  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
756  return __esimd_unpack_mask<N>(src0);
757 }
758 
761 template <int N>
762 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
764  simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
765  src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
766  return esimd::pack_mask(src_0);
767 }
768 
775 template <typename T, int N>
776 __ESIMD_API
777  std::enable_if_t<(std::is_same_v<T, ushort> || std::is_same_v<T, uint>) &&
778  (N > 0 && N <= 32),
780  ballot(simd<T, N> mask) {
781  simd_mask<N> cmp = (mask != 0);
782  if constexpr (N == 8 || N == 16 || N == 32) {
783  return __esimd_pack_mask<N>(cmp.data());
784  } else {
785  constexpr int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
786  simd<uint16_t, N1> res = 0;
787  res.template select<N, 1>() = cmp.data();
788  return __esimd_pack_mask<N1>(res.data());
789  }
790 }
791 
796 template <typename T, int N>
797 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
798  std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
799 cbit(simd<T, N> src) {
800  return __esimd_cbit<T, N>(src.data());
801 }
802 
805 template <typename T>
806 __ESIMD_API
807  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4), uint32_t>
808  cbit(T src) {
809  simd<T, 1> Src = src;
810  simd<uint32_t, 1> Result = esimd::cbit(Src);
811  return Result[0];
812 }
813 
818 template <typename BaseTy, typename RegionTy>
819 __ESIMD_API std::enable_if_t<
820  std::is_integral<
822  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
824  uint32_t>
826  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
827  simd<Ty, 1> Src = src;
828  simd<uint32_t, 1> Result = esimd::cbit(Src);
829  return Result[0];
830 }
831 
839 template <typename T, int N>
840 __ESIMD_API
841  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), simd<T, N>>
842  fbl(simd<T, N> src) {
843  return __esimd_fbl<T, N>(src.data());
844 }
845 
848 template <typename T>
849 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
850 fbl(T src) {
851  simd<T, 1> Src = src;
852  simd<T, 1> Result = esimd::fbl(Src);
853  return Result[0];
854 }
855 
861 template <typename BaseTy, typename RegionTy>
862 __ESIMD_API std::enable_if_t<
863  std::is_integral<
865  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
869  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
870  simd<Ty, 1> Src = src;
871  simd<Ty, 1> Result = esimd::fbl(Src);
872  return Result[0];
873 }
874 
882 template <typename T, int N>
883 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
884  std::is_signed<T>::value && (sizeof(T) == 4),
886 fbh(simd<T, N> src) {
887  return __esimd_sfbh<T, N>(src.data());
888 }
889 
896 template <typename T, int N>
897 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
898  !std::is_signed<T>::value && (sizeof(T) == 4),
900 fbh(simd<T, N> src) {
901  return __esimd_ufbh<T, N>(src.data());
902 }
903 
906 template <typename T>
907 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
908 fbh(T src) {
909  simd<T, 1> Src = src;
910  simd<T, 1> Result = esimd::fbh(Src);
911  return Result[0];
912 }
913 
919 template <typename BaseTy, typename RegionTy>
920 __ESIMD_API std::enable_if_t<
921  std::is_integral<
923  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
927  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
928  simd<Ty, 1> Src = src;
929  simd<Ty, 1> Result = esimd::fbh(Src);
930  return Result[0];
931 }
932 
943 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
944 __ESIMD_API
945  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
948  using ComputationTy =
949  __ESIMD_DNS::computation_type_t<decltype(src0), int32_t>;
950  ComputationTy Src0 = src0;
951  ComputationTy Src1 = src1;
952 
953  if constexpr (std::is_same_v<Sat, saturation_on_tag>) {
954  if constexpr (std::is_unsigned<T0>::value) {
955  if constexpr (std::is_unsigned<
956  typename ComputationTy::element_type>::value)
957  return __esimd_uushl_sat<T0, typename ComputationTy::element_type, SZ>(
958  Src0.data(), Src1.data());
959  else
960  return __esimd_usshl_sat<T0, typename ComputationTy::element_type, SZ>(
961  Src0.data(), Src1.data());
962  } else {
963  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
964  return __esimd_sushl_sat<T0, typename ComputationTy::element_type, SZ>(
965  Src0.data(), Src1.data());
966  else
967  return __esimd_ssshl_sat<T0, typename ComputationTy::element_type, SZ>(
968  Src0.data(), Src1.data());
969  }
970  } else {
971  if constexpr (std::is_unsigned<T0>::value) {
972  if constexpr (std::is_unsigned<
973  typename ComputationTy::element_type>::value)
974  return __esimd_uushl<T0, typename ComputationTy::element_type, SZ>(
975  Src0.data(), Src1.data());
976  else
977  return __esimd_usshl<T0, typename ComputationTy::element_type, SZ>(
978  Src0.data(), Src1.data());
979  } else {
980  if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
981  return __esimd_sushl<T0, typename ComputationTy::element_type, SZ>(
982  Src0.data(), Src1.data());
983  else
984  return __esimd_ssshl<T0, typename ComputationTy::element_type, SZ>(
985  Src0.data(), Src1.data());
986  }
987  }
988 }
989 
999 template <typename T0, typename T1, int SZ, typename U,
1000  class Sat = saturation_off_tag>
1001 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1002  std::is_integral<T1>::value &&
1003  std::is_integral<U>::value,
1005 shl(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1006  simd<U, SZ> Src1 = src1;
1007  return shl<T0, T1, SZ>(src0, Src1, sat);
1008 }
1009 
1019 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1020 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1021  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1022  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1023  std::is_integral<T0>::value &&
1024  std::is_integral<T1>::value &&
1025  std::is_integral<T2>::value,
1026  std::remove_const_t<T0>>
1027 shl(T1 src0, T2 src1, Sat sat = {}) {
1028  simd<T1, 1> Src0 = src0;
1029  simd<T0, 1> Result = shl<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1030  return Result[0];
1031 }
1032 
1043 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1044 __ESIMD_API
1045  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1048  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1049  typedef typename std::make_unsigned<IntermedTy>::type ComputationTy;
1052  simd<ComputationTy, SZ> Result = Src0.data() >> Src1.data();
1053 
1054  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1055  return Result;
1056  else
1057  return saturate<T0>(Result);
1058 }
1059 
1070 template <typename T0, typename T1, int SZ, typename U,
1071  class Sat = saturation_off_tag>
1072 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1073  std::is_integral<T1>::value &&
1074  std::is_integral<U>::value,
1076 lsr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1077  simd<T1, SZ> Src1 = src1;
1078  return lsr<T0, T1, SZ>(src0, Src1, sat);
1079 }
1080 
1091 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1092 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1093  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1094  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1095  std::is_integral<T0>::value &&
1096  std::is_integral<T1>::value &&
1097  std::is_integral<T2>::value,
1098  std::remove_const_t<T0>>
1099 lsr(T1 src0, T2 src1, Sat sat = {}) {
1100  simd<T1, 1> Src0 = src0;
1101  simd<T0, 1> Result = lsr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1102 
1103  return Result[0];
1104 }
1105 
1116 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1117 __ESIMD_API
1118  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1121  using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
1122  typedef typename std::make_signed<IntermedTy>::type ComputationTy;
1125  simd<ComputationTy, SZ> Result = Src0 >> Src1;
1126  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
1127  return Result;
1128  else
1129  return saturate<T0>(Result);
1130 }
1131 
1142 template <typename T0, typename T1, int SZ, typename U,
1143  class Sat = saturation_off_tag>
1144 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1145  std::is_integral<T1>::value &&
1146  std::is_integral<U>::value,
1148 asr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1149  simd<U, SZ> Src1 = src1;
1150  return asr<T0, T1, SZ>(src0, Src1, sat);
1151 }
1152 
1163 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1164 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1165  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1166  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1167  std::is_integral<T0>::value &&
1168  std::is_integral<T1>::value &&
1169  std::is_integral<T2>::value,
1170  std::remove_const_t<T0>>
1171 asr(T1 src0, T2 src1, Sat sat = {}) {
1172  simd<T1, 1> Src0 = src0;
1173  simd<T0, 1> Result = esimd::asr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1174  return Result[0];
1175 }
1176 
1187 template <typename T0, typename T1, int SZ, class Sat = saturation_off_tag>
1188 __ESIMD_API
1189  std::enable_if_t<std::is_integral<T0>::value && std::is_integral<T1>::value,
1192  if constexpr (std::is_unsigned<T1>::value) {
1193  return esimd::lsr<T0, T1, SZ>(src0, src1, sat);
1194  } else {
1195  return esimd::asr<T0, T1, SZ>(src0, src1, sat);
1196  }
1197 }
1198 
1209 template <typename T0, typename T1, int SZ, typename U,
1210  class Sat = saturation_off_tag>
1211 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
1212  std::is_integral<T1>::value &&
1213  std::is_integral<U>::value,
1215 shr(simd<T1, SZ> src0, U src1, Sat sat = {}) {
1216  simd<U, SZ> Src1 = src1;
1217  return shr<T0, T1, SZ>(src0, Src1, sat);
1218 }
1219 
1229 template <typename T0, typename T1, typename T2, class Sat = saturation_off_tag>
1230 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1231  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1232  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1233  std::is_integral<T0>::value &&
1234  std::is_integral<T1>::value &&
1235  std::is_integral<T2>::value,
1236  std::remove_const_t<T0>>
1237 shr(T1 src0, T2 src1, Sat sat = {}) {
1238  simd<T1, 1> Src0 = src0;
1239  simd<T0, 1> Result = shr<T0, T1, 1, T2, Sat>(Src0, src1, sat);
1240  return Result[0];
1241 }
1242 
1251 template <typename T0, typename T1, int SZ>
1252 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1253  uint32_t, int64_t, uint64_t>() &&
1254  detail::is_type<T1, int16_t, uint16_t, int32_t,
1255  uint32_t, int64_t, uint64_t>(),
1258  return __esimd_rol<T0, T1, SZ>(src0.data(), src1.data());
1259 }
1260 
1269 template <typename T0, typename T1, int SZ, typename U>
1270 __ESIMD_API
1271  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1272  int64_t, uint64_t>() &&
1273  detail::is_type<T1, int16_t, uint16_t, int32_t,
1274  uint32_t, int64_t, uint64_t>() &&
1275  detail::is_type<U, int16_t, uint16_t, int32_t,
1276  uint32_t, int64_t, uint64_t>(),
1278  rol(simd<T1, SZ> src0, U src1) {
1279  simd<T1, SZ> Src1 = src1;
1280  return rol<T0>(src0, Src1);
1281 }
1282 
1290 template <typename T0, typename T1, typename T2>
1291 __ESIMD_API
1292  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1293  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1294  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1295  detail::is_type<T0, int16_t, uint16_t, int32_t,
1296  uint32_t, int64_t, uint64_t>() &&
1297  detail::is_type<T1, int16_t, uint16_t, int32_t,
1298  uint32_t, int64_t, uint64_t>() &&
1299  detail::is_type<T2, int16_t, uint16_t, int32_t,
1300  uint32_t, int64_t, uint64_t>(),
1301  std::remove_const_t<T0>>
1302  rol(T1 src0, T2 src1) {
1303  simd<T1, 1> Src0 = src0;
1304  simd<T0, 1> Result = rol<T0, T1, 1, T2>(Src0, src1);
1305  return Result[0];
1306 }
1307 
1316 template <typename T0, typename T1, int SZ>
1317 __ESIMD_API std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t,
1318  uint32_t, int64_t, uint64_t>() &&
1319  detail::is_type<T1, int16_t, uint16_t, int32_t,
1320  uint32_t, int64_t, uint64_t>(),
1323  return __esimd_ror<T0, T1, SZ>(src0.data(), src1.data());
1324 }
1325 
1334 template <typename T0, typename T1, int SZ, typename U>
1335 __ESIMD_API
1336  std::enable_if_t<detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
1337  int64_t, uint64_t>() &&
1338  detail::is_type<T1, int16_t, uint16_t, int32_t,
1339  uint32_t, int64_t, uint64_t>() &&
1340  detail::is_type<U, int16_t, uint16_t, int32_t,
1341  uint32_t, int64_t, uint64_t>(),
1343  ror(simd<T1, SZ> src0, U src1) {
1344  simd<T1, SZ> Src1 = src1;
1345  return esimd::ror<T0>(src0, Src1);
1346 }
1347 
1355 template <typename T0, typename T1, typename T2>
1356 __ESIMD_API
1357  std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1358  __ESIMD_DNS::is_esimd_scalar<T1>::value &&
1359  __ESIMD_DNS::is_esimd_scalar<T2>::value &&
1360  detail::is_type<T0, int16_t, uint16_t, int32_t,
1361  uint32_t, int64_t, uint64_t>() &&
1362  detail::is_type<T1, int16_t, uint16_t, int32_t,
1363  uint32_t, int64_t, uint64_t>() &&
1364  detail::is_type<T2, int16_t, uint16_t, int32_t,
1365  uint32_t, int64_t, uint64_t>(),
1366  std::remove_const_t<T0>>
1367  ror(T1 src0, T2 src1) {
1368  simd<T1, 1> Src0 = src0;
1369  simd<T0, 1> Result = esimd::ror<T0, T1, 1, T2>(Src0, src1);
1370  return Result[0];
1371 }
1372 
1374 
1377 
1390 template <typename T1, typename T2, typename T3, typename T4, int N,
1391  class Sat = saturation_off_tag>
1392 __ESIMD_API std::enable_if_t<
1393  detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
1394  detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
1397 #if defined(__SYCL_DEVICE_ONLY__)
1398  simd<T1, N> Result;
1399  simd<T2, N> Src0 = src0;
1400  simd<T3, N> Src1 = src1;
1401  simd<T4, N> Src2 = src2;
1402  if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
1403  if constexpr (std::is_unsigned<T1>::value) {
1404  if constexpr (std::is_unsigned<T2>::value) {
1405  Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1406  Src2.data());
1407  } else {
1408  Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1409  Src2.data());
1410  }
1411  } else {
1412  if constexpr (std::is_unsigned<T2>::value) {
1413  Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1414  Src2.data());
1415  } else {
1416  Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1417  Src2.data());
1418  }
1419  }
1420  } else {
1421  if constexpr (std::is_unsigned<T1>::value) {
1422  if constexpr (std::is_unsigned<T2>::value) {
1423  Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1424  Src2.data());
1425  } else {
1426  Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1427  Src2.data());
1428  }
1429  } else {
1430  if constexpr (std::is_unsigned<T2>::value) {
1431  Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1432  Src2.data());
1433  } else {
1434  Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
1435  Src2.data());
1436  }
1437  }
1438  }
1439  return Result;
1440 #else
1441  __ESIMD_UNSUPPORTED_ON_HOST;
1442 #endif // __SYCL_DEVICE_ONLY__
1443 }
1444 
1445 // reduction functions
1446 namespace detail {
1447 template <typename T0, typename T1, int SZ> struct esimd_apply_sum {
1448  template <typename... T>
1450  return v1 + v2;
1451  }
1452 };
1454 template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
1455  template <typename... T>
1457  return v1 * v2;
1458  }
1459 };
1461 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
1462  template <typename... T>
1464  if constexpr (std::is_floating_point<T1>::value) {
1465  return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
1466  } else if constexpr (std::is_unsigned<T1>::value) {
1467  return __esimd_umax<T1, SZ>(v1.data(), v2.data());
1468  } else {
1469  return __esimd_smax<T1, SZ>(v1.data(), v2.data());
1470  }
1471  }
1472 };
1474 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
1475  template <typename... T>
1477  if constexpr (std::is_floating_point<T1>::value) {
1478  return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
1479  } else if constexpr (std::is_unsigned<T1>::value) {
1480  return __esimd_umin<T1, SZ>(v1.data(), v2.data());
1481  } else {
1482  return __esimd_smin<T1, SZ>(v1.data(), v2.data());
1483  }
1484  }
1485 };
1486 
1487 template <typename T0, typename T1, int SZ,
1488  template <typename RT, typename T, int N> class OpType>
1490  if constexpr (SZ == 1) {
1491  return v[0];
1492  } else {
1493  static_assert(detail::isPowerOf2(SZ),
1494  "Invaid input for reduce_single - the vector size must "
1495  "be power of two.");
1496  constexpr int N = SZ / 2;
1497  simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
1498  v.template select<N, 1>(N));
1499  return reduce_single<T0, T0, N, OpType>(tmp);
1500  }
1501 }
1502 
1503 template <typename T0, typename T1, int N1, int N2,
1504  template <typename RT, typename T, int N> class OpType>
1506  if constexpr (N1 == N2) {
1507  simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
1508  return reduce_single<T0, T0, N1, OpType>(tmp);
1509  } else if constexpr (N1 < N2) {
1510  simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
1511  constexpr int N = N2 - N1;
1512  using NT = simd<T0, N>;
1513  NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
1514  return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
1515  } else {
1516  static_assert(detail::isPowerOf2(N1),
1517  "Invaid input for reduce_pair - N1 must be power of two.");
1518  constexpr int N = N1 / 2;
1519  simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
1520  v1.template select<N, 1>(N));
1521  using NT = simd<T0, N2>;
1522  NT tmp2 = convert<T0>(v2);
1523  return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
1524  }
1525 }
1526 
1527 template <typename T0, typename T1, int SZ,
1528  template <typename RT, typename T, int N> class OpType>
1529 T0 reduce(simd<T1, SZ> v) {
1530  constexpr bool isPowerOf2 = detail::isPowerOf2(SZ);
1531  if constexpr (isPowerOf2) {
1532  return reduce_single<T0, T1, SZ, OpType>(v);
1533  } else {
1534  constexpr unsigned N1 = 1u << detail::log2<SZ>();
1535  constexpr unsigned N2 = SZ - N1;
1536 
1537  simd<T1, N1> v1 = v.template select<N1, 1>(0);
1538  simd<T1, N2> v2 = v.template select<N2, 1>(N1);
1539  return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1540  }
1541 };
1542 
1543 template <typename T0, typename T1, int SZ>
1544 ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd<T1, SZ> v) {
1545  using TT = detail::computation_type_t<simd<T1, SZ>>;
1546  using RT = typename TT::element_type;
1547  T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1548  return retv;
1549 }
1550 
1551 template <typename T0, typename T1, int SZ>
1552 ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd<T1, SZ> v) {
1553  using TT = detail::computation_type_t<simd<T1, SZ>>;
1554  using RT = typename TT::element_type;
1555  T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1556  return retv;
1557 }
1558 } // namespace detail
1560 
1568 template <typename T0, typename T1, int SZ>
1569 ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd<T1, SZ> v) {
1570  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1571  return retv;
1572 }
1573 
1581 template <typename T0, typename T1, int SZ>
1582 ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd<T1, SZ> v) {
1583  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1584  return retv;
1585 }
1586 
1600 // TODO 1) enforce BinaryOperation constraints 2) support std::minimum/maximum
1601 template <typename T0, typename T1, int SZ, typename BinaryOperation>
1602 ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd<T1, SZ> v, BinaryOperation op) {
1603  if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1604  std::plus<>>::value) {
1605  T0 retv = detail::sum<T0>(v);
1606  return retv;
1607  } else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1608  std::multiplies<>>::value) {
1609  T0 retv = detail::prod<T0>(v);
1610  return retv;
1611  }
1612 }
1613 
1616 
1621 enum class bfn_t : uint8_t { x = 0xAA, y = 0xCC, z = 0xF0 };
1623 static constexpr bfn_t operator~(bfn_t x) {
1624  uint8_t val = static_cast<uint8_t>(x);
1625  uint8_t res = ~val;
1626  return static_cast<bfn_t>(res);
1627 }
1629 static constexpr bfn_t operator|(bfn_t x, bfn_t y) {
1630  uint8_t arg0 = static_cast<uint8_t>(x);
1631  uint8_t arg1 = static_cast<uint8_t>(y);
1632  uint8_t res = arg0 | arg1;
1633  return static_cast<bfn_t>(res);
1634 }
1636 static constexpr bfn_t operator&(bfn_t x, bfn_t y) {
1637  uint8_t arg0 = static_cast<uint8_t>(x);
1638  uint8_t arg1 = static_cast<uint8_t>(y);
1639  uint8_t res = arg0 & arg1;
1640  return static_cast<bfn_t>(res);
1641 }
1643 static constexpr bfn_t operator^(bfn_t x, bfn_t y) {
1644  uint8_t arg0 = static_cast<uint8_t>(x);
1645  uint8_t arg1 = static_cast<uint8_t>(y);
1646  uint8_t res = arg0 ^ arg1;
1647  return static_cast<bfn_t>(res);
1648 }
1649 
1658 template <bfn_t FuncControl, typename T, int N>
1659 __ESIMD_API std::enable_if_t<std::is_integral_v<T>, __ESIMD_NS::simd<T, N>>
1660 bfn(__ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
1661  __ESIMD_NS::simd<T, N> src2) {
1662  if constexpr ((sizeof(T) == 8) || ((sizeof(T) == 1) && (N % 4 == 0)) ||
1663  ((sizeof(T) == 2) && (N % 2 == 0))) {
1664  // Bitcast Nx8-byte vectors to 2xN vectors of 4-byte integers.
1665  // Bitcast Nx1-byte vectors to N/4 vectors of 4-byte integers.
1666  // Bitcast Nx2-byte vectors to N/2 vectors of 4-byte integers.
1667  auto Result = __ESIMD_NS::bfn<FuncControl>(
1668  src0.template bit_cast_view<int32_t>().read(),
1669  src1.template bit_cast_view<int32_t>().read(),
1670  src2.template bit_cast_view<int32_t>().read());
1671  return Result.template bit_cast_view<T>();
1672  } else if constexpr (sizeof(T) == 2 || sizeof(T) == 4) {
1673  constexpr uint8_t FC = static_cast<uint8_t>(FuncControl);
1674  return __esimd_bfn<FC, T, N>(src0.data(), src1.data(), src2.data());
1675  } else if constexpr (N % 2 == 0) {
1676  // Bitcast Nx1-byte vectors (N is even) to N/2 vectors of 2-byte integers.
1677  auto Result = __ESIMD_NS::bfn<FuncControl>(
1678  src0.template bit_cast_view<int16_t>().read(),
1679  src1.template bit_cast_view<int16_t>().read(),
1680  src2.template bit_cast_view<int16_t>().read());
1681  return Result.template bit_cast_view<T>();
1682  } else {
1683  // Odd number of 1-byte elements.
1684  __ESIMD_NS::simd<T, N + 1> Src0, Src1, Src2;
1685  Src0.template select<N, 1>() = src0;
1686  Src1.template select<N, 1>() = src1;
1687  Src2.template select<N, 1>() = src2;
1688  auto Result = __ESIMD_NS::bfn<FuncControl>(Src0, Src1, Src2);
1689  return Result.template select<N, 1>();
1690  }
1691 }
1692 
1700 template <bfn_t FuncControl, typename T>
1701 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
1702  __ESIMD_DNS::is_esimd_scalar<T>::value && std::is_integral_v<T>, T>
1703 bfn(T src0, T src1, T src2) {
1704  __ESIMD_NS::simd<T, 1> Src0 = src0;
1705  __ESIMD_NS::simd<T, 1> Src1 = src1;
1706  __ESIMD_NS::simd<T, 1> Src2 = src2;
1707  __ESIMD_NS::simd<T, 1> Result =
1708  esimd::bfn<FuncControl, T, 1>(Src0, Src1, Src2);
1709  return Result[0];
1710 }
1711 
1713 
1720 template <int N>
1721 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1722 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1723  __ESIMD_NS::simd<uint32_t, N> src1) {
1724  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1725  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1726  Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());
1727 
1728  carry = Result.first;
1729  return Result.second;
1730 }
1731 
1738 template <int N>
1739 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1740 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
1741  uint32_t src1) {
1742  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1743  return addc(carry, src0, Src1V);
1744 }
1745 
1752 template <int N>
1753 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1754 addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
1755  __ESIMD_NS::simd<uint32_t, N> src1) {
1756  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1757  return addc(carry, Src0V, src1);
1758 }
1759 
1766 __ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
1767  __ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
1768  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1769  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1770  __ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
1771  carry = CarryV[0];
1772  return Res[0];
1773 }
1774 
1782 template <int N>
1783 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1784 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1785  __ESIMD_NS::simd<uint32_t, N> src1) {
1786  std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1787  __ESIMD_DNS::vector_type_t<uint32_t, N>>
1788  Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());
1789 
1790  borrow = Result.first;
1791  return Result.second;
1792 }
1793 
1801 template <int N>
1802 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1803 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
1804  uint32_t src1) {
1805  __ESIMD_NS::simd<uint32_t, N> Src1V = src1;
1806  return subb(borrow, src0, Src1V);
1807 }
1808 
1816 template <int N>
1817 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1818 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
1819  __ESIMD_NS::simd<uint32_t, N> src1) {
1820  __ESIMD_NS::simd<uint32_t, N> Src0V = src0;
1821  return subb(borrow, Src0V, src1);
1822 }
1823 
1831 __ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
1832  __ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
1833  __ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
1834  __ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
1835  __ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
1836  borrow = BorrowV[0];
1837  return Res[0];
1838 }
1839 
1842 __ESIMD_API uint64_t rdtsc() {
1843  __ESIMD_NS::simd<uint32_t, 4> retv = __esimd_timestamp();
1844  return retv.template bit_cast_view<uint64_t>()[0];
1845 }
1846 
1848 
1849 } // namespace ext::intel::esimd
1850 } // namespace _V1
1851 } // namespace sycl
const auto & data() const noexcept
Definition: simd.hpp:1673
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
Definition: simd_view.hpp:64
The main simd vector class.
Definition: simd.hpp:53
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > ror(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate right operation with two vector inputs.
Definition: math.hpp:1321
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > asr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Arithmetical Shift Right (vector version)
Definition: math.hpp:1119
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > lsr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Logical Shift Right (vector version)
Definition: math.hpp:1046
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), uint > pack_mask(simd_mask< N > src0)
Pack a simd_mask into a single unsigned 32-bit integer value.
Definition: math.hpp:740
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shr(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift right operation (vector version)
Definition: math.hpp:1190
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)==4), simd< T, N > > fbl(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the least signif...
Definition: math.hpp:841
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value, simd< T0, SZ > > shl(simd< T1, SZ > src0, simd< T1, SZ > src1, Sat sat={})
Shift left operation (vector version)
Definition: math.hpp:946
__ESIMD_API std::enable_if_t<(std::is_same_v< T, ushort >||std::is_same_v< T, uint >) &&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
Definition: math.hpp:779
__ESIMD_API std::enable_if_t< detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), simd< T0, SZ > > rol(simd< T1, SZ > src0, simd< T1, SZ > src1)
Rotate left operation with two vector inputs.
Definition: math.hpp:1256
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)<=4), simd< uint32_t, N > > cbit(simd< T, N > src)
Count number of bits set in the source operand per element.
Definition: math.hpp:798
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_signed< T >::value &&(sizeof(T)==4), simd< T, N > > fbh(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the most signifi...
Definition: math.hpp:885
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
Definition: math.hpp:754
#define __ESIMD_INTRINSIC_DEF(name)
Definition: math.hpp:604
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndu(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-up (also known as ceil).
Definition: math.hpp:648
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndd(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-down (also known as floor).
Definition: math.hpp:639
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > trunc(const sycl::ext::intel::esimd::simd< float, SZ > &src0, Sat sat={})
Round to integral value using the round to zero rounding mode (vector version).
Definition: math.hpp:708
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > floor(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Floor" operation, vector version - alias of rndd.
Definition: math.hpp:675
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rnde(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-even (also known as round).
Definition: math.hpp:656
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > ceil(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Ceiling" operation, vector version - alias of rndu.
Definition: math.hpp:688
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndz(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-zero (also known as trunc).
Definition: math.hpp:664
__ESIMD_API std::enable_if_t< std::is_integral_v< T >, sycl::ext::intel::esimd::simd< T, N > > bfn(sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd< T, N > src2)
Performs binary function computation with three vector operands.
Definition: math.hpp:1659
static constexpr bfn_t operator&(bfn_t x, bfn_t y)
Definition: math.hpp:1635
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
Definition: math.hpp:1620
static constexpr bfn_t operator|(bfn_t x, bfn_t y)
Definition: math.hpp:1628
static constexpr bfn_t operator~(bfn_t x)
Definition: math.hpp:1622
static constexpr bfn_t operator^(bfn_t x, bfn_t y)
Definition: math.hpp:1642
__ESIMD_API simd< T, N > cos(simd< T, N > src, Sat sat={})
Cosine.
Definition: math.hpp:436
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
Definition: math.hpp:402
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
Definition: math.hpp:428
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:365
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
Definition: math.hpp:498
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:464
__ESIMD_API simd< T, N > inv(simd< T, N > src, Sat sat={})
Inversion - calculates (1/x).
Definition: math.hpp:388
__ESIMD_API simd< T, N > sqrt_ieee(simd< T, N > src, Sat sat={})
IEEE754-compliant square root. Supports float and double.
Definition: math.hpp:407
#define __ESIMD_EMATH_SPIRV_COND
Definition: math.hpp:383
__ESIMD_API simd< T, N > div_ieee(simd< T, N > src0, simd< U, N > src1, Sat sat={})
IEEE754-compliant floating-point division. Supports float and double.
Definition: math.hpp:539
__ESIMD_API simd< T, N > sin(simd< T, N > src, Sat sat={})
Sine.
Definition: math.hpp:432
#define __ESIMD_EMATH_IEEE_COND
Definition: math.hpp:380
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
Definition: math.hpp:394
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
Definition: math.hpp:398
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
Definition: math.hpp:1568
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:180
__ESIMD_API simd< T, SZ >(max)(simd< T
Selects component-wise the maximum of the two vectors.
ESIMD_DETAIL ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > log(simd< T, SZ > src0, Sat sat={})
Computes the natural logarithm of the given argument.
Definition: math.hpp:564
__ESIMD_API SZ simd< T, SZ > Sat sat
Definition: math.hpp:180
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
Definition: math.hpp:1601
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() min(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:298
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
Definition: math.hpp:72
__ESIMD_API SZ simd< T, SZ > Sat int SZ
Definition: math.hpp:211
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > addc(sycl::ext::intel::esimd::simd< uint32_t, N > &carry, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs add with carry of 2 unsigned 32-bit vectors.
Definition: math.hpp:1721
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() max(simd< T, SZ > src0, T src1, Sat sat={})
Definition: math.hpp:213
__ESIMD_API uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
Definition: math.hpp:1841
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
Definition: math.hpp:1581
ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > exp(simd< T, SZ > src0, Sat sat={})
Computes e raised to the power of the given argument.
Definition: math.hpp:585
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
Definition: math.hpp:211
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > subb(sycl::ext::intel::esimd::simd< uint32_t, N > &borrow, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs substraction with borrow of 2 unsigned 32-bit vectors.
Definition: math.hpp:1783
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
Definition: math.hpp:1395
__ESIMD_API SZ src0
Definition: math.hpp:180
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
Definition: math.hpp:127
conditional< sizeof(long)==8, long, long long >::type int64_t
Definition: kernel_desc.hpp:35
T0 reduce(simd< T1, SZ > v)
Definition: math.hpp:1528
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
Definition: math.hpp:1551
T0 reduce_single(simd< T1, SZ > v)
Definition: math.hpp:1488
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
Definition: math.hpp:1543
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
Definition: math.hpp:1504
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:96
autodecltype(x) x
Definition: access.hpp:18
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1455
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1462
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1475
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
Definition: math.hpp:1448
This type tag represents "saturation off" behavior.
Definition: common.hpp:50