DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
21 
22 #include <cstdint>
23 
24 namespace sycl {
26 namespace ext::intel::esimd {
27 
40 
43 
68 template <typename T0, typename T1, int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70  std::is_same_v<T1, T0>,
73  if constexpr (detail::is_generic_floating_point_v<T0>)
74  return __esimd_sat<T0, T1, SZ>(src.data());
75  else if constexpr (detail::is_generic_floating_point_v<T1>) {
76  if constexpr (std::is_unsigned<T0>::value)
77  return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
78  else
79  return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
80  } else if constexpr (std::is_unsigned<T0>::value) {
81  if constexpr (std::is_unsigned<T1>::value)
82  return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
83  else
84  return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
85  } else {
86  if constexpr (std::is_signed<T1>::value)
87  return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
88  else
89  return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
90  }
91 }
92 
94 // abs
95 namespace detail {
96 
97 template <typename TRes, typename TArg, int SZ>
98 ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
99 __esimd_abs_common_internal(simd<TArg, SZ> src0) {
100  simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
101  return convert<TRes>(Result);
102 }
103 
104 template <typename TRes, typename TArg>
105 ESIMD_NODEBUG
106  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
107  detail::is_esimd_scalar<TArg>::value,
108  TRes>
109  __esimd_abs_common_internal(TArg src0) {
110  simd<TArg, 1> Src0 = src0;
111  simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
112  return convert<TRes>(Result)[0];
113 }
114 } // namespace detail
116 
123 template <typename TRes, typename TArg, int SZ>
124 __ESIMD_API std::enable_if_t<
125  !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
128  return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(src0.data());
129 }
130 
136 template <typename TRes, typename TArg>
137 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
138  std::remove_const_t<TArg>>::value &&
139  detail::is_esimd_scalar<TRes>::value &&
140  detail::is_esimd_scalar<TArg>::value,
141  std::remove_const_t<TRes>>
142 abs(TArg src0) {
143  return detail::__esimd_abs_common_internal<TRes, TArg>(src0);
144 }
145 
153 template <typename T1, int SZ> __ESIMD_API simd<T1, SZ> abs(simd<T1, SZ> src0) {
154  return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
155 }
156 
163 template <typename T1>
164 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
165  std::remove_const_t<T1>>
166 abs(T1 src0) {
167  return detail::__esimd_abs_common_internal<T1, T1>(src0);
168 }
169 
179 template <typename T, int SZ, class Sat = saturation_off_tag>
180 __ESIMD_API simd<T, SZ> max(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
181  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
182 
183  if constexpr (std::is_floating_point<T>::value) {
184  auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
185  if constexpr (is_sat)
186  Result = __esimd_sat<T, T, SZ>(Result);
187  return simd<T, SZ>(Result);
188  } else if constexpr (std::is_unsigned<T>::value) {
189  auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
190  if constexpr (is_sat)
191  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
192  return simd<T, SZ>(Result);
193  } else {
194  auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
195  if constexpr (is_sat)
196  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
197  return simd<T, SZ>(Result);
198  }
199 }
200 
211 template <typename T, int SZ, class Sat = saturation_off_tag>
212 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
213 max(simd<T, SZ> src0, T src1, Sat sat = {}) {
214  simd<T, SZ> Src1 = src1;
215  simd<T, SZ> Result = esimd::max<T>(src0, Src1, sat);
216  return Result;
217 }
218 
229 template <typename T, int SZ, class Sat = saturation_off_tag>
230 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
231 max(T src0, simd<T, SZ> src1, Sat sat = {}) {
232  simd<T, SZ> Src0 = src0;
233  simd<T, SZ> Result = esimd::max<T>(Src0, src1, sat);
234  return Result;
235 }
236 
245 template <typename T, class Sat = saturation_off_tag>
246 ESIMD_NODEBUG
247  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
248  max(T src0, T src1, Sat sat = {}) {
249  simd<T, 1> Src0 = src0;
250  simd<T, 1> Src1 = src1;
251  simd<T, 1> Result = esimd::max<T>(Src0, Src1, sat);
252  return Result[0];
253 }
254 
264 template <typename T, int SZ, class Sat = saturation_off_tag>
265 __ESIMD_API simd<T, SZ> min(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
266  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
267 
268  if constexpr (std::is_floating_point<T>::value) {
269  auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
270  if constexpr (is_sat)
271  Result = __esimd_sat<T, T, SZ>(Result);
272  return simd<T, SZ>(Result);
273  } else if constexpr (std::is_unsigned<T>::value) {
274  auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
275  if constexpr (is_sat)
276  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
277  return simd<T, SZ>(Result);
278  } else {
279  auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
280  if constexpr (is_sat)
281  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
282  return simd<T, SZ>(Result);
283  }
284 }
285 
296 template <typename T, int SZ, class Sat = saturation_off_tag>
297 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
298 min(simd<T, SZ> src0, T src1, Sat sat = {}) {
299  simd<T, SZ> Src1 = src1;
300  simd<T, SZ> Result = esimd::min<T>(src0, Src1, sat);
301  return Result;
302 }
303 
314 template <typename T, int SZ, class Sat = saturation_off_tag>
315 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
316 min(T src0, simd<T, SZ> src1, Sat sat = {}) {
317  simd<T, SZ> Src0 = src0;
318  simd<T, SZ> Result = esimd::min<T>(Src0, src1, sat);
319  return Result;
320 }
321 
330 template <typename T, class Sat = saturation_off_tag>
331 ESIMD_NODEBUG
332  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
333  min(T src0, T src1, Sat sat = {}) {
334  simd<T, 1> Src0 = src0;
335  simd<T, 1> Src1 = src1;
336  simd<T, 1> Result = esimd::min<T>(Src0, Src1, sat);
337  return Result[0];
338 }
339 
341 
344 
345 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
346  \
347  template <class T, int N, class Sat = saturation_off_tag, \
348  class = std::enable_if_t<COND>> \
349  __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
350  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
351  __esimd_##iname<T, N>(src.data()); \
352  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
353  return res; \
354  else \
355  return esimd::saturate<T>(res); \
356  } \
357  \
358  \
359  template <typename T, class Sat = saturation_off_tag, \
360  class = std::enable_if_t<COND>> \
361  __ESIMD_API T name(T src, Sat sat = {}) { \
362  simd<T, 1> src_vec = src; \
363  simd<T, 1> res = name<T, 1>(src_vec, sat); \
364  return res[0]; \
365  }
366 
367 #define __ESIMD_EMATH_COND \
368  detail::is_generic_floating_point_v<T> && (sizeof(T) <= 4)
369 
370 #define __ESIMD_EMATH_IEEE_COND \
371  detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
372 
376 
382 
386 
390 
393 
398 
402 
406 
407 #undef __ESIMD_UNARY_INTRINSIC_DEF
408 
409 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
410  \
411  template <class T, int N, class U, class Sat = saturation_off_tag, \
412  class = std::enable_if_t<COND>> \
413  __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
414  Sat sat = {}) { \
415  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
416  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
417  RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
418  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
419  return res_raw; \
420  else \
421  return esimd::saturate<T>(simd<T, N>(res_raw)); \
422  } \
423  \
424  \
425  template <class T, int N, class U, class Sat = saturation_off_tag, \
426  class = std::enable_if_t<COND>> \
427  __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
428  return name<T, N, U>(src0, simd<U, N>(src1), sat); \
429  } \
430  \
431  \
432  template <class T, class U, class Sat = saturation_off_tag, \
433  class = std::enable_if_t<COND>> \
434  __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
435  simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
436  return res[0]; \
437  }
438 
443 
446 
447 #undef __ESIMD_BINARY_INTRINSIC_DEF
448 #undef __ESIMD_EMATH_COND
449 #undef __ESIMD_EMATH_IEEE_COND
450 
452 
455 
457 namespace detail {
458 // std::numbers::ln2_v<float> in c++20
459 constexpr float ln2 = 0.69314718f;
460 // std::numbers::log2e_v<float> in c++20
461 constexpr float log2e = 1.442695f;
462 } // namespace detail
464 
469 template <class T, int SZ, class Sat = saturation_off_tag>
470 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> log(simd<T, SZ> src0, Sat sat = {}) {
471  using CppT = __ESIMD_DNS::__cpp_t<T>;
472  simd<T, SZ> Result =
473  esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
474 
475  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
476  return Result;
477  else
478  return esimd::saturate<T>(Result);
479 }
480 
481 template <class T, class Sat = saturation_off_tag>
482 ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat = {}) {
483  return esimd::log<T, 1>(src0, sat)[0];
484 }
485 
490 template <class T, int SZ, class Sat = saturation_off_tag>
491 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> exp(simd<T, SZ> src0, Sat sat = {}) {
492  using CppT = __ESIMD_DNS::__cpp_t<T>;
493  return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
494 }
495 
496 template <class T, class Sat = saturation_off_tag>
497 ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat = {}) {
498  return esimd::exp<T, 1>(src0, sat)[0];
499 }
500 
502 
505 
507 // Rounding intrinsics.
509 
510 #define __ESIMD_INTRINSIC_DEF(name) \
511  \
512  \
513  \
515  \
516  \
517  \
518  template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
519  __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
520  Sat sat = {}) { \
521  __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
522  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
523  return Result; \
524  else if constexpr (!std::is_same_v<float, T>) { \
525  auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
526  return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
527  } else { \
528  return __ESIMD_NS::saturate<T>(Result); \
529  } \
530  } \
531  \
532  template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
533  __ESIMD_API T name(float src0, Sat sat = {}) { \
534  __ESIMD_NS::simd<float, 1> Src0 = src0; \
535  __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
536  return Result[0]; \
537  }
538 
547 
556 
564 
572 
573 #undef __ESIMD_INTRINSIC_DEF
575 
578 
580 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
581 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
582 floor(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
583  return esimd::rndd<RT, SZ>(src0, sat);
584 }
585 
587 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
588 ESIMD_INLINE RT floor(float src0, Sat sat = {}) {
589  return esimd::rndd<RT, 1U>(src0, sat)[0];
590 }
591 
593 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
594 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
595 ceil(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
596  return esimd::rndu<RT, SZ>(src0, sat);
597 }
598 
600 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
601 ESIMD_INLINE RT ceil(float src0, Sat sat = {}) {
602  return esimd::rndu<RT, 1U>(src0, sat);
603 }
604 
613 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
614 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
615 trunc(const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
616  return esimd::rndz<RT, SZ>(src0, sat);
617 }
618 
626 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
627 __ESIMD_API RT trunc(float src0, Sat sat = {}) {
628  return esimd::rndz<RT, 1U>(src0, sat)[0];
629 }
630 
632 
635 
644 template <int N>
645 ESIMD_NODEBUG
646  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
647  pack_mask(simd_mask<N> src0) {
648  return __esimd_pack_mask<N>(src0.data());
649 }
650 
658 template <int N>
659 ESIMD_NODEBUG
660  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
661  unpack_mask(uint src0) {
662  return __esimd_unpack_mask<N>(src0);
663 }
664 
667 template <int N>
668 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
669 pack_mask(simd_mask<N> src0) {
670  simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
671  src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
672  return esimd::pack_mask(src_0);
673 }
674 
681 template <typename T, int N>
682 __ESIMD_API std::enable_if_t<(std::is_same_v<T, ushort> ||
683  std::is_same_v<T, uint>)&&(N > 0 && N <= 32),
685 ballot(simd<T, N> mask) {
686  simd_mask<N> cmp = (mask != 0);
687  if constexpr (N == 8 || N == 16 || N == 32) {
688  return __esimd_pack_mask<N>(cmp.data());
689  } else {
690  constexpr int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
691  simd<uint16_t, N1> res = 0;
692  res.template select<N, 1>() = cmp.data();
693  return __esimd_pack_mask<N1>(res.data());
694  }
695 }
696 
701 template <typename T, int N>
702 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
703  std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
704 cbit(simd<T, N> src) {
705  return __esimd_cbit<T, N>(src.data());
706 }
707 
710 template <typename T>
711 __ESIMD_API
712  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4), uint32_t>
713  cbit(T src) {
714  simd<T, 1> Src = src;
715  simd<uint32_t, 1> Result = esimd::cbit(Src);
716  return Result[0];
717 }
718 
723 template <typename BaseTy, typename RegionTy>
724 __ESIMD_API std::enable_if_t<
725  std::is_integral<
726  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
727  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
729  uint32_t>
731  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
732  simd<Ty, 1> Src = src;
733  simd<uint32_t, 1> Result = esimd::cbit(Src);
734  return Result[0];
735 }
736 
744 template <typename T, int N>
745 __ESIMD_API
746  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), simd<T, N>>
747  fbl(simd<T, N> src) {
748  return __esimd_fbl<T, N>(src.data());
749 }
750 
753 template <typename T>
754 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
755 fbl(T src) {
756  simd<T, 1> Src = src;
757  simd<T, 1> Result = esimd::fbl(Src);
758  return Result[0];
759 }
760 
766 template <typename BaseTy, typename RegionTy>
767 __ESIMD_API std::enable_if_t<
768  std::is_integral<
769  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
770  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
774  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
775  simd<Ty, 1> Src = src;
776  simd<Ty, 1> Result = esimd::fbl(Src);
777  return Result[0];
778 }
779 
787 template <typename T, int N>
788 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
789  std::is_signed<T>::value && (sizeof(T) == 4),
791 fbh(simd<T, N> src) {
792  return __esimd_sfbh<T, N>(src.data());
793 }
794 
801 template <typename T, int N>
802 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
803  !std::is_signed<T>::value && (sizeof(T) == 4),
805 fbh(simd<T, N> src) {
806  return __esimd_ufbh<T, N>(src.data());
807 }
808 
811 template <typename T>
812 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
813 fbh(T src) {
814  simd<T, 1> Src = src;
815  simd<T, 1> Result = esimd::fbh(Src);
816  return Result[0];
817 }
818 
824 template <typename BaseTy, typename RegionTy>
825 __ESIMD_API std::enable_if_t<
826  std::is_integral<
827  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
828  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
832  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
833  simd<Ty, 1> Src = src;
834  simd<Ty, 1> Result = esimd::fbh(Src);
835  return Result[0];
836 }
837 
839 
842 
855 template <typename T1, typename T2, typename T3, typename T4, int N,
856  class Sat = saturation_off_tag>
857 __ESIMD_API std::enable_if_t<
858  detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
859  detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
861 dp4a(simd<T2, N> src0, simd<T3, N> src1, simd<T4, N> src2, Sat sat = {}) {
862  simd<T2, N> Src0 = src0;
863  simd<T3, N> Src1 = src1;
864  simd<T4, N> Src2 = src2;
865  simd<T1, N> Result;
866 
867 #if defined(__SYCL_DEVICE_ONLY__)
868  if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
869  if constexpr (std::is_unsigned<T1>::value) {
870  if constexpr (std::is_unsigned<T2>::value) {
871  Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
872  Src2.data());
873  } else {
874  Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
875  Src2.data());
876  }
877  } else {
878  if constexpr (std::is_unsigned<T2>::value) {
879  Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
880  Src2.data());
881  } else {
882  Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
883  Src2.data());
884  }
885  }
886  } else {
887  if constexpr (std::is_unsigned<T1>::value) {
888  if constexpr (std::is_unsigned<T2>::value) {
889  Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
890  Src2.data());
891  } else {
892  Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
893  Src2.data());
894  }
895  } else {
896  if constexpr (std::is_unsigned<T2>::value) {
897  Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
898  Src2.data());
899  } else {
900  Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
901  Src2.data());
902  }
903  }
904  }
905 #else
906  simd<T2, N> tmp =
907  __esimd_dp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(), Src2.data());
908 
909  if (std::is_same_v<Sat, saturation_on_tag>)
910  Result = esimd::saturate<T1>(tmp);
911  else
912  Result = convert<T1>(tmp);
913 #endif // __SYCL_DEVICE_ONLY__
914 
915  return Result;
916 }
917 
918 // reduction functions
919 namespace detail {
920 template <typename T0, typename T1, int SZ> struct esimd_apply_sum {
921  template <typename... T>
922  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
923  return v1 + v2;
924  }
925 };
926 
927 template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
928  template <typename... T>
929  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
930  return v1 * v2;
931  }
932 };
933 
934 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
935  template <typename... T>
936  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
937  if constexpr (std::is_floating_point<T1>::value) {
938  return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
939  } else if constexpr (std::is_unsigned<T1>::value) {
940  return __esimd_umax<T1, SZ>(v1.data(), v2.data());
941  } else {
942  return __esimd_smax<T1, SZ>(v1.data(), v2.data());
943  }
944  }
945 };
946 
947 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
948  template <typename... T>
949  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
950  if constexpr (std::is_floating_point<T1>::value) {
951  return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
952  } else if constexpr (std::is_unsigned<T1>::value) {
953  return __esimd_umin<T1, SZ>(v1.data(), v2.data());
954  } else {
955  return __esimd_smin<T1, SZ>(v1.data(), v2.data());
956  }
957  }
958 };
959 
960 template <typename T0, typename T1, int SZ,
961  template <typename RT, typename T, int N> class OpType>
963  if constexpr (SZ == 1) {
964  return v[0];
965  } else {
966  static_assert(detail::isPowerOf2(SZ),
967  "Invaid input for reduce_single - the vector size must "
968  "be power of two.");
969  constexpr int N = SZ / 2;
970  simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
971  v.template select<N, 1>(N));
972  return reduce_single<T0, T0, N, OpType>(tmp);
973  }
974 }
975 
976 template <typename T0, typename T1, int N1, int N2,
977  template <typename RT, typename T, int N> class OpType>
979  if constexpr (N1 == N2) {
980  simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
981  return reduce_single<T0, T0, N1, OpType>(tmp);
982  } else if constexpr (N1 < N2) {
983  simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
984  constexpr int N = N2 - N1;
985  using NT = simd<T0, N>;
986  NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
987  return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
988  } else {
989  static_assert(detail::isPowerOf2(N1),
990  "Invaid input for reduce_pair - N1 must be power of two.");
991  constexpr int N = N1 / 2;
992  simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
993  v1.template select<N, 1>(N));
994  using NT = simd<T0, N2>;
995  NT tmp2 = convert<T0>(v2);
996  return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
997  }
998 }
999 
1000 template <typename T0, typename T1, int SZ,
1001  template <typename RT, typename T, int N> class OpType>
1002 T0 reduce(simd<T1, SZ> v) {
1003  constexpr bool isPowerOf2 = detail::isPowerOf2(SZ);
1004  if constexpr (isPowerOf2) {
1005  return reduce_single<T0, T1, SZ, OpType>(v);
1006  } else {
1007  constexpr unsigned N1 = 1u << detail::log2<SZ>();
1008  constexpr unsigned N2 = SZ - N1;
1009 
1010  simd<T1, N1> v1 = v.template select<N1, 1>(0);
1011  simd<T1, N2> v2 = v.template select<N2, 1>(N1);
1012  return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1013  }
1014 };
1015 
1016 template <typename T0, typename T1, int SZ>
1017 ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd<T1, SZ> v) {
1018  using TT = detail::computation_type_t<simd<T1, SZ>>;
1019  using RT = typename TT::element_type;
1020  T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1021  return retv;
1022 }
1023 
1024 template <typename T0, typename T1, int SZ>
1025 ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd<T1, SZ> v) {
1026  using TT = detail::computation_type_t<simd<T1, SZ>>;
1027  using RT = typename TT::element_type;
1028  T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1029  return retv;
1030 }
1031 } // namespace detail
1033 
1041 template <typename T0, typename T1, int SZ>
1042 ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd<T1, SZ> v) {
1043  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1044  return retv;
1045 }
1046 
1054 template <typename T0, typename T1, int SZ>
1055 ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd<T1, SZ> v) {
1056  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1057  return retv;
1058 }
1059 
1073 // TODO 1) enforce BinaryOperation constraints 2) support std::minimum/maximum
1074 template <typename T0, typename T1, int SZ, typename BinaryOperation>
1075 ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd<T1, SZ> v, BinaryOperation op) {
1076  if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1077  std::plus<>>::value) {
1078  T0 retv = detail::sum<T0>(v);
1079  return retv;
1080  } else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1081  std::multiplies<>>::value) {
1082  T0 retv = detail::prod<T0>(v);
1083  return retv;
1084  }
1085 }
1086 
1088 
1089 } // namespace ext::intel::esimd
1090 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
1091 } // namespace sycl
Definition: simd.hpp:1384
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
Definition: simd_view.hpp:64
The main simd vector class.
Definition: simd.hpp:57
#define __SYCL_INLINE_VER_NAMESPACE(X)
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)==4) &&(simd_view< BaseTy, RegionTy >::length==1), typename simd_view< BaseTy, RegionTy >::element_type > fbl(simd_view< BaseTy, RegionTy > src)
Scalar version of fbl, that takes simd_view object as an argument, e.g.
Definition: math.hpp:772
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)<=4) &&(simd_view< BaseTy, RegionTy >::length==1), uint32_t > cbit(simd_view< BaseTy, RegionTy > src)
Scalar version of cbit, that takes simd_view object as an argument, e.g.
Definition: math.hpp:729
__ESIMD_API std::enable_if_t<(std::is_same_v< T, ushort >||std::is_same_v< T, uint >)&&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
Definition: math.hpp:684
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)==4) &&(simd_view< BaseTy, RegionTy >::length==1), typename simd_view< BaseTy, RegionTy >::element_type > fbh(simd_view< BaseTy, RegionTy > src)
Scalar version of fbh, that takes simd_view object as an argument, e.g.
Definition: math.hpp:830
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
Definition: math.hpp:660
__ESIMD_API T rnde(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:562
__ESIMD_API T rndd(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:545
#define __ESIMD_INTRINSIC_DEF(name)
Definition: math.hpp:510
ESIMD_INLINE RT ceil(float src0, Sat sat={})
"Ceiling" operation, scalar version - alias of rndu.
Definition: math.hpp:600
__ESIMD_API T rndu(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:554
__ESIMD_API T rndz(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:570
ESIMD_INLINE RT floor(float src0, Sat sat={})
"Floor" operation, scalar version - alias of rndd.
Definition: math.hpp:587
__ESIMD_API RT trunc(float src0, Sat sat={})
Round to integral value using the round to zero rounding mode (scalar version).
Definition: math.hpp:626
__ESIMD_API T pow(T src0, U src1, Sat sat={})
(scalar, scalar) version.
Definition: math.hpp:442
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:345
__ESIMD_API T sqrt(T src, Sat sat={})
Scalar version.
Definition: math.hpp:389
__ESIMD_API T cos(T src, Sat sat={})
Scalar version.
Definition: math.hpp:405
__ESIMD_API T sqrt_ieee(T src, Sat sat={})
Scalar version.
Definition: math.hpp:392
__ESIMD_API T log2(T src, Sat sat={})
Scalar version.
Definition: math.hpp:381
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:409
__ESIMD_API T inv(T src, Sat sat={})
Scalar version.
Definition: math.hpp:375
__ESIMD_API T exp2(T src, Sat sat={})
Scalar version.
Definition: math.hpp:385
__ESIMD_API T div_ieee(T src0, U src1, Sat sat={})
(scalar, scalar) version.
Definition: math.hpp:445
#define __ESIMD_EMATH_COND
Definition: math.hpp:367
__ESIMD_API T sin(T src, Sat sat={})
Scalar version.
Definition: math.hpp:401
__ESIMD_API T rsqrt(T src, Sat sat={})
Scalar version.
Definition: math.hpp:397
#define __ESIMD_EMATH_IEEE_COND
Definition: math.hpp:370
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
Definition: math.hpp:1041
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
Definition: math.hpp:1074
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
Definition: math.hpp:72
ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat={})
Definition: math.hpp:482
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
Definition: math.hpp:1054
ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat={})
Definition: math.hpp:497
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
Definition: math.hpp:860
__ESIMD_API std::enable_if_t< detail::is_esimd_scalar< T1 >::value, std::remove_const_t< T1 > > abs(T1 src0)
Get absolute value (scalar version).
Definition: math.hpp:166
typename std::enable_if< B, T >::type enable_if_t
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
Definition: math.hpp:1024
T0 reduce_single(simd< T1, SZ > v)
Definition: math.hpp:961
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
Definition: math.hpp:1016
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
Definition: math.hpp:977
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
Definition: common.hpp:79
__ESIMD_API std::enable_if_t<(N !=8 &&N !=16 &&N< 32), uint > pack_mask(simd_mask< N > src0)
pack_mask specialization when the number of elements N is not 8, 16 or 32.
Definition: math.hpp:668
float length(T p) __NOEXC
Definition: builtins.hpp:1032
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
simd< _Tp, _Abi > min(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept