DPC++ Runtime
Runtime libraries for oneAPI DPC++
math.hpp
Go to the documentation of this file.
1 //==-------------- math.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement Explicit SIMD math APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
21 
22 #include <cstdint>
23 
25 namespace __ESIMD_NS {
26 
39 
42 
67 template <typename T0, typename T1, int SZ>
68 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
69  std::is_same_v<T1, T0>,
72  if constexpr (detail::is_generic_floating_point_v<T0>)
73  return __esimd_sat<T0, T1, SZ>(src.data());
74  else if constexpr (detail::is_generic_floating_point_v<T1>) {
75  if constexpr (std::is_unsigned<T0>::value)
76  return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
77  else
78  return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
79  } else if constexpr (std::is_unsigned<T0>::value) {
80  if constexpr (std::is_unsigned<T1>::value)
81  return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
82  else
83  return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
84  } else {
85  if constexpr (std::is_signed<T1>::value)
86  return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
87  else
88  return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
89  }
90 }
91 
93 // abs
94 namespace detail {
95 
96 template <typename T0, typename T1, int SZ>
97 ESIMD_NODEBUG ESIMD_INLINE simd<T0, SZ>
98 __esimd_abs_common_internal(simd<T1, SZ> src0) {
99  simd<T1, SZ> Result = simd<T0, SZ>(__esimd_abs<T1, SZ>(src0.data()));
100  return Result;
101 }
102 
103 template <typename T0, typename T1>
104 ESIMD_NODEBUG
105  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T0>::value &&
106  detail::is_esimd_scalar<T1>::value,
107  std::remove_const_t<T0>>
108  __esimd_abs_common_internal(T1 src0) {
109  using TT0 = std::remove_const_t<T0>;
110  using TT1 = std::remove_const_t<T1>;
111 
112  simd<TT1, 1> Src0 = src0;
113  simd<TT0, 1> Result = __esimd_abs_common_internal<TT0>(Src0);
114  return Result[0];
115 }
116 } // namespace detail
118 
125 template <typename T0, typename T1, int SZ>
126 __ESIMD_API std::enable_if_t<
127  !std::is_same<std::remove_const_t<T0>, std::remove_const_t<T1>>::value,
128  simd<T0, SZ>>
130  return detail::__esimd_abs_common_internal<T0, T1, SZ>(src0.data());
131 }
132 
138 template <typename T0, typename T1>
139 __ESIMD_API std::enable_if_t<
140  !std::is_same<std::remove_const_t<T0>, std::remove_const_t<T1>>::value &&
141  detail::is_esimd_scalar<T0>::value &&
142  detail::is_esimd_scalar<T1>::value,
143  std::remove_const_t<T0>>
144 abs(T1 src0) {
145  return detail::__esimd_abs_common_internal<T0, T1>(src0);
146 }
147 
155 template <typename T1, int SZ> __ESIMD_API simd<T1, SZ> abs(simd<T1, SZ> src0) {
156  return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
157 }
158 
165 template <typename T1>
166 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
167  std::remove_const_t<T1>>
168 abs(T1 src0) {
169  return detail::__esimd_abs_common_internal<T1, T1>(src0);
170 }
171 
181 template <typename T, int SZ, class Sat = saturation_off_tag>
182 __ESIMD_API simd<T, SZ> max(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
183  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
184 
185  if constexpr (std::is_floating_point<T>::value) {
186  auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
187  if constexpr (is_sat)
188  Result = __esimd_sat<T, T, SZ>(Result);
189  return simd<T, SZ>(Result);
190  } else if constexpr (std::is_unsigned<T>::value) {
191  auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
192  if constexpr (is_sat)
193  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
194  return simd<T, SZ>(Result);
195  } else {
196  auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
197  if constexpr (is_sat)
198  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
199  return simd<T, SZ>(Result);
200  }
201 }
202 
213 template <typename T, int SZ, class Sat = saturation_off_tag>
214 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
215 max(simd<T, SZ> src0, T src1, Sat sat = {}) {
216  simd<T, SZ> Src1 = src1;
217  simd<T, SZ> Result = esimd::max<T>(src0, Src1, sat);
218  return Result;
219 }
220 
231 template <typename T, int SZ, class Sat = saturation_off_tag>
232 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
233 max(T src0, simd<T, SZ> src1, Sat sat = {}) {
234  simd<T, SZ> Src0 = src0;
235  simd<T, SZ> Result = esimd::max<T>(Src0, src1, sat);
236  return Result;
237 }
238 
247 template <typename T, class Sat = saturation_off_tag>
248 ESIMD_NODEBUG
249  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
250  max(T src0, T src1, Sat sat = {}) {
251  simd<T, 1> Src0 = src0;
252  simd<T, 1> Src1 = src1;
253  simd<T, 1> Result = esimd::max<T>(Src0, Src1, sat);
254  return Result[0];
255 }
256 
266 template <typename T, int SZ, class Sat = saturation_off_tag>
267 __ESIMD_API simd<T, SZ> min(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
268  constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
269 
270  if constexpr (std::is_floating_point<T>::value) {
271  auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
272  if constexpr (is_sat)
273  Result = __esimd_sat<T, T, SZ>(Result);
274  return simd<T, SZ>(Result);
275  } else if constexpr (std::is_unsigned<T>::value) {
276  auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
277  if constexpr (is_sat)
278  Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
279  return simd<T, SZ>(Result);
280  } else {
281  auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
282  if constexpr (is_sat)
283  Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
284  return simd<T, SZ>(Result);
285  }
286 }
287 
298 template <typename T, int SZ, class Sat = saturation_off_tag>
299 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
300 min(simd<T, SZ> src0, T src1, Sat sat = {}) {
301  simd<T, SZ> Src1 = src1;
302  simd<T, SZ> Result = esimd::min<T>(src0, Src1, sat);
303  return Result;
304 }
305 
316 template <typename T, int SZ, class Sat = saturation_off_tag>
317 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value, simd<T, SZ>>
318 min(T src0, simd<T, SZ> src1, Sat sat = {}) {
319  simd<T, SZ> Src0 = src0;
320  simd<T, SZ> Result = esimd::min<T>(Src0, src1, sat);
321  return Result;
322 }
323 
332 template <typename T, class Sat = saturation_off_tag>
333 ESIMD_NODEBUG
334  ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
335  min(T src0, T src1, Sat sat = {}) {
336  simd<T, 1> Src0 = src0;
337  simd<T, 1> Src1 = src1;
338  simd<T, 1> Result = esimd::min<T>(Src0, Src1, sat);
339  return Result[0];
340 }
341 
343 
346 
347 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
348  \
349  template <class T, int N, class Sat = saturation_off_tag, \
350  class = std::enable_if_t<COND>> \
351  __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
352  __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
353  __esimd_##iname<T, N>(src.data()); \
354  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
355  return res; \
356  else \
357  return esimd::saturate<T>(res); \
358  } \
359  \
360  \
361  template <typename T, class Sat = saturation_off_tag, \
362  class = std::enable_if_t<COND>> \
363  __ESIMD_API T name(T src, Sat sat = {}) { \
364  simd<T, 1> src_vec = src; \
365  simd<T, 1> res = name<T, 1>(src_vec, sat); \
366  return res[0]; \
367  }
368 
369 #define __ESIMD_EMATH_COND \
370  detail::is_generic_floating_point_v<T> && (sizeof(T) <= 4)
371 
372 #define __ESIMD_EMATH_IEEE_COND \
373  detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
374 
378 
384 
388 
392 
395 
400 
404 
408 
409 #undef __ESIMD_UNARY_INTRINSIC_DEF
410 
411 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
412  \
413  template <class T, int N, class U, class Sat = saturation_off_tag, \
414  class = std::enable_if_t<COND>> \
415  __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
416  Sat sat = {}) { \
417  using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
418  RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
419  RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
420  if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
421  return res_raw; \
422  else \
423  return esimd::saturate<T>(simd<T, N>(res_raw)); \
424  } \
425  \
426  \
427  template <class T, int N, class U, class Sat = saturation_off_tag, \
428  class = std::enable_if_t<COND>> \
429  __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
430  return name<T, N, U>(src0, simd<U, N>(src1), sat); \
431  } \
432  \
433  \
434  template <class T, class U, class Sat = saturation_off_tag, \
435  class = std::enable_if_t<COND>> \
436  __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
437  simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
438  return res[0]; \
439  }
440 
445 
448 
449 #undef __ESIMD_BINARY_INTRINSIC_DEF
450 #undef __ESIMD_EMATH_COND
451 #undef __ESIMD_EMATH_IEEE_COND
452 
454 
457 
459 namespace detail {
460 // std::numbers::ln2_v<float> in c++20
461 constexpr float ln2 = 0.69314718f;
462 // std::numbers::log2e_v<float> in c++20
463 constexpr float log2e = 1.442695f;
464 } // namespace detail
466 
471 template <class T, int SZ, class Sat = saturation_off_tag>
472 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> log(simd<T, SZ> src0, Sat sat = {}) {
473  using CppT = __ESIMD_DNS::__cpp_t<T>;
474  simd<T, SZ> Result =
475  esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
476 
477  if constexpr (std::is_same_v<Sat, saturation_off_tag>)
478  return Result;
479  else
480  return esimd::saturate<T>(Result);
481 }
482 
483 template <class T, class Sat = saturation_off_tag>
484 ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat = {}) {
485  return esimd::log<T, 1>(src0, sat)[0];
486 }
487 
492 template <class T, int SZ, class Sat = saturation_off_tag>
493 ESIMD_NODEBUG ESIMD_INLINE simd<T, SZ> exp(simd<T, SZ> src0, Sat sat = {}) {
494  using CppT = __ESIMD_DNS::__cpp_t<T>;
495  return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
496 }
497 
498 template <class T, class Sat = saturation_off_tag>
499 ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat = {}) {
500  return esimd::exp<T, 1>(src0, sat)[0];
501 }
502 
504 
507 
509 // Rounding intrinsics.
511 
512 #define __ESIMD_INTRINSIC_DEF(name) \
513  \
514  \
515  \
517  \
518  \
519  \
520  template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
521  __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
522  Sat sat = {}) { \
523  __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
524  if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
525  return Result; \
526  else if constexpr (!std::is_same_v<float, T>) { \
527  auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
528  return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
529  } else { \
530  return __ESIMD_NS::saturate<T>(Result); \
531  } \
532  } \
533  \
534  template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
535  __ESIMD_API T name(float src0, Sat sat = {}) { \
536  __ESIMD_NS::simd<float, 1> Src0 = src0; \
537  __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
538  return Result[0]; \
539  }
540 
549 
558 
566 
574 
575 #undef __ESIMD_INTRINSIC_DEF
576 
580 
582 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
583 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
584 floor(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
585  return esimd::rndd<RT, SZ>(src0, sat);
586 }
587 
589 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
590 ESIMD_INLINE RT floor(float src0, Sat sat = {}) {
591  return esimd::rndd<RT, 1U>(src0, sat)[0];
592 }
593 
595 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
596 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
597 ceil(const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
598  return esimd::rndu<RT, SZ>(src0, sat);
599 }
600 
602 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
603 ESIMD_INLINE RT ceil(float src0, Sat sat = {}) {
604  return esimd::rndu<RT, 1U>(src0, sat);
605 }
606 
615 template <typename RT, int SZ, class Sat = __ESIMD_NS::saturation_off_tag>
616 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
617 trunc(const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
618  return esimd::rndz<RT, SZ>(src0, sat);
619 }
620 
628 template <typename RT, class Sat = __ESIMD_NS::saturation_off_tag>
629 __ESIMD_API RT trunc(float src0, Sat sat = {}) {
630  return esimd::rndz<RT, 1U>(src0, sat)[0];
631 }
632 
634 
637 
646 template <int N>
647 ESIMD_NODEBUG
648  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
649  pack_mask(simd_mask<N> src0) {
650  return __esimd_pack_mask<N>(src0.data());
651 }
652 
660 template <int N>
661 ESIMD_NODEBUG
662  ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
663  unpack_mask(uint src0) {
664  return __esimd_unpack_mask<N>(src0);
665 }
666 
669 template <int N>
670 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
671 pack_mask(simd_mask<N> src0) {
672  simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
673  src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
674  return esimd::pack_mask(src_0);
675 }
676 
683 template <typename T, int N>
684 __ESIMD_API
685  std::enable_if_t<detail::is_type<T, ushort, uint>() && (N > 0 && N <= 32),
687  ballot(simd<T, N> mask) {
688  simd_mask<N> cmp = (mask != 0);
689  if constexpr (N == 8 || N == 16 || N == 32) {
690  return __esimd_pack_mask<N>(cmp.data());
691  } else {
692  constexpr int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
693  simd<uint16_t, N1> res = 0;
694  res.template select<N, 1>() = cmp.data();
695  return __esimd_pack_mask<N1>(res.data());
696  }
697 }
698 
703 template <typename T, int N>
704 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
705  std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
706 cbit(simd<T, N> src) {
707  return __esimd_cbit<T, N>(src.data());
708 }
709 
712 template <typename T>
713 __ESIMD_API
714  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4), uint32_t>
715  cbit(T src) {
716  simd<T, 1> Src = src;
717  simd<uint32_t, 1> Result = esimd::cbit(Src);
718  return Result[0];
719 }
720 
725 template <typename BaseTy, typename RegionTy>
726 __ESIMD_API std::enable_if_t<
727  std::is_integral<
728  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
729  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
731  uint32_t>
733  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
734  simd<Ty, 1> Src = src;
735  simd<uint32_t, 1> Result = esimd::cbit(Src);
736  return Result[0];
737 }
738 
746 template <typename T, int N>
747 __ESIMD_API
748  std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), simd<T, N>>
749  fbl(simd<T, N> src) {
750  return __esimd_fbl<T, N>(src.data());
751 }
752 
755 template <typename T>
756 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
757 fbl(T src) {
758  simd<T, 1> Src = src;
759  simd<T, 1> Result = esimd::fbl(Src);
760  return Result[0];
761 }
762 
768 template <typename BaseTy, typename RegionTy>
769 __ESIMD_API std::enable_if_t<
770  std::is_integral<
771  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
772  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
776  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
777  simd<Ty, 1> Src = src;
778  simd<Ty, 1> Result = esimd::fbl(Src);
779  return Result[0];
780 }
781 
789 template <typename T, int N>
790 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
791  std::is_signed<T>::value && (sizeof(T) == 4),
793 fbh(simd<T, N> src) {
794  return __esimd_sfbh<T, N>(src.data());
795 }
796 
803 template <typename T, int N>
804 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
805  !std::is_signed<T>::value && (sizeof(T) == 4),
807 fbh(simd<T, N> src) {
808  return __esimd_ufbh<T, N>(src.data());
809 }
810 
813 template <typename T>
814 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (sizeof(T) == 4), T>
815 fbh(T src) {
816  simd<T, 1> Src = src;
817  simd<T, 1> Result = esimd::fbh(Src);
818  return Result[0];
819 }
820 
826 template <typename BaseTy, typename RegionTy>
827 __ESIMD_API std::enable_if_t<
828  std::is_integral<
829  typename simd_view<BaseTy, RegionTy>::element_type>::value &&
830  (sizeof(typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
834  using Ty = typename simd_view<BaseTy, RegionTy>::element_type;
835  simd<Ty, 1> Src = src;
836  simd<Ty, 1> Result = esimd::fbh(Src);
837  return Result[0];
838 }
839 
841 
844 
857 template <typename T1, typename T2, typename T3, typename T4, int N,
858  class Sat = saturation_off_tag>
859 __ESIMD_API std::enable_if_t<
860  detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
861  detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
863 dp4a(simd<T2, N> src0, simd<T3, N> src1, simd<T4, N> src2, Sat sat = {}) {
864  simd<T2, N> Src0 = src0;
865  simd<T3, N> Src1 = src1;
866  simd<T4, N> Src2 = src2;
867  simd<T1, N> Result;
868 
869 #if defined(__SYCL_DEVICE_ONLY__)
870  if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
871  if constexpr (std::is_unsigned<T1>::value) {
872  if constexpr (std::is_unsigned<T2>::value) {
873  Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
874  Src2.data());
875  } else {
876  Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
877  Src2.data());
878  }
879  } else {
880  if constexpr (std::is_unsigned<T2>::value) {
881  Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
882  Src2.data());
883  } else {
884  Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
885  Src2.data());
886  }
887  }
888  } else {
889  if constexpr (std::is_unsigned<T1>::value) {
890  if constexpr (std::is_unsigned<T2>::value) {
891  Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
892  Src2.data());
893  } else {
894  Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
895  Src2.data());
896  }
897  } else {
898  if constexpr (std::is_unsigned<T2>::value) {
899  Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
900  Src2.data());
901  } else {
902  Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
903  Src2.data());
904  }
905  }
906  }
907 #else
908  simd<T2, N> tmp =
909  __esimd_dp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(), Src2.data());
910 
911  if (std::is_same_v<Sat, saturation_on_tag>)
912  Result = esimd::saturate<T1>(tmp);
913  else
914  Result = convert<T1>(tmp);
915 #endif // __SYCL_DEVICE_ONLY__
916 
917  return Result;
918 }
919 
920 // reduction functions
921 namespace detail {
922 template <typename T0, typename T1, int SZ> struct esimd_apply_sum {
923  template <typename... T>
924  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
925  return v1 + v2;
926  }
927 };
928 
929 template <typename T0, typename T1, int SZ> struct esimd_apply_prod {
930  template <typename... T>
931  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
932  return v1 * v2;
933  }
934 };
935 
936 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
937  template <typename... T>
938  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
939  if constexpr (std::is_floating_point<T1>::value) {
940  return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
941  } else if constexpr (std::is_unsigned<T1>::value) {
942  return __esimd_umax<T1, SZ>(v1.data(), v2.data());
943  } else {
944  return __esimd_smax<T1, SZ>(v1.data(), v2.data());
945  }
946  }
947 };
948 
949 template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
950  template <typename... T>
951  simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
952  if constexpr (std::is_floating_point<T1>::value) {
953  return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
954  } else if constexpr (std::is_unsigned<T1>::value) {
955  return __esimd_umin<T1, SZ>(v1.data(), v2.data());
956  } else {
957  return __esimd_smin<T1, SZ>(v1.data(), v2.data());
958  }
959  }
960 };
961 
962 template <typename T0, typename T1, int SZ,
963  template <typename RT, typename T, int N> class OpType>
965  if constexpr (SZ == 1) {
966  return v[0];
967  } else {
968  static_assert(detail::isPowerOf2(SZ),
969  "Invaid input for reduce_single - the vector size must "
970  "be power of two.");
971  constexpr int N = SZ / 2;
972  simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
973  v.template select<N, 1>(N));
974  return reduce_single<T0, T0, N, OpType>(tmp);
975  }
976 }
977 
978 template <typename T0, typename T1, int N1, int N2,
979  template <typename RT, typename T, int N> class OpType>
981  if constexpr (N1 == N2) {
982  simd<T0, N1> tmp = OpType<T0, T1, N1>()(v1, v2);
983  return reduce_single<T0, T0, N1, OpType>(tmp);
984  } else if constexpr (N1 < N2) {
985  simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
986  constexpr int N = N2 - N1;
987  using NT = simd<T0, N>;
988  NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
989  return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
990  } else {
991  static_assert(detail::isPowerOf2(N1),
992  "Invaid input for reduce_pair - N1 must be power of two.");
993  constexpr int N = N1 / 2;
994  simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
995  v1.template select<N, 1>(N));
996  using NT = simd<T0, N2>;
997  NT tmp2 = convert<T0>(v2);
998  return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
999  }
1000 }
1001 
1002 template <typename T0, typename T1, int SZ,
1003  template <typename RT, typename T, int N> class OpType>
1004 T0 reduce(simd<T1, SZ> v) {
1005  constexpr bool isPowerOf2 = detail::isPowerOf2(SZ);
1006  if constexpr (isPowerOf2) {
1007  return reduce_single<T0, T1, SZ, OpType>(v);
1008  } else {
1009  constexpr unsigned N1 = 1u << detail::log2<SZ>();
1010  constexpr unsigned N2 = SZ - N1;
1011 
1012  simd<T1, N1> v1 = v.template select<N1, 1>(0);
1013  simd<T1, N2> v2 = v.template select<N2, 1>(N1);
1014  return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1015  }
1016 };
1017 
1018 template <typename T0, typename T1, int SZ>
1019 ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd<T1, SZ> v) {
1020  using TT = detail::computation_type_t<simd<T1, SZ>>;
1021  using RT = typename TT::element_type;
1022  T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1023  return retv;
1024 }
1025 
1026 template <typename T0, typename T1, int SZ>
1027 ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd<T1, SZ> v) {
1028  using TT = detail::computation_type_t<simd<T1, SZ>>;
1029  using RT = typename TT::element_type;
1030  T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1031  return retv;
1032 }
1033 } // namespace detail
1035 
1043 template <typename T0, typename T1, int SZ>
1044 ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd<T1, SZ> v) {
1045  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1046  return retv;
1047 }
1048 
1056 template <typename T0, typename T1, int SZ>
1057 ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd<T1, SZ> v) {
1058  T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1059  return retv;
1060 }
1061 
1075 // TODO 1) enforce BinaryOperation constraints 2) support std::minimum/maximum
1076 template <typename T0, typename T1, int SZ, typename BinaryOperation>
1077 ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd<T1, SZ> v, BinaryOperation op) {
1078  if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1079  std::plus<>>::value) {
1080  T0 retv = detail::sum<T0>(v);
1081  return retv;
1082  } else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1083  std::multiplies<>>::value) {
1084  T0 retv = detail::prod<T0>(v);
1085  return retv;
1086  }
1087 }
1088 
1090 
1091 } // namespace __ESIMD_NS
1092 } // __SYCL_INLINE_NAMESPACE(cl)
cl::sycl::ext::intel::esimd::saturate
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
Definition: math.hpp:71
host_util.hpp
__ESIMD_BINARY_INTRINSIC_DEF
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:411
cl::sycl::ext::intel::esimd::div_ieee
__ESIMD_API T div_ieee(T src0, U src1, Sat sat={})
(scalar, scalar) version.
Definition: math.hpp:447
simd_mask
Definition: simd.hpp:1029
cl::sycl::ext::intel::esimd::detail::reduce_single
T0 reduce_single(simd< T1, SZ > v)
Definition: math.hpp:963
T
__ESIMD_EMATH_COND
#define __ESIMD_EMATH_COND
Definition: math.hpp:369
cl::sycl::ext::intel::esimd::detail::esimd_apply_reduced_min
Definition: math.hpp:948
cl::sycl::ext::intel::esimd::reduce
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
Definition: math.hpp:1076
simd_view.hpp
cl::sycl::ext::intel::esimd::floor
ESIMD_INLINE RT floor(float src0, Sat sat={})
"Floor" operation, scalar version - alias of rndd.
Definition: math.hpp:589
cl::sycl::ext::intel::esimd::cos
__ESIMD_API T cos(T src, Sat sat={})
Scalar version.
Definition: math.hpp:407
cl::sycl::ext::intel::esimd::rndu
__ESIMD_API T rndu(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:556
cl::sycl::ext::intel::esimd::hmax
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
Definition: math.hpp:1043
__ESIMD_EMATH_IEEE_COND
#define __ESIMD_EMATH_IEEE_COND
Definition: math.hpp:372
cl::sycl::ext::intel::esimd::abs
__ESIMD_API std::enable_if_t< detail::is_esimd_scalar< T1 >::value, std::remove_const_t< T1 > > abs(T1 src0)
Get absolute value (scalar version).
Definition: math.hpp:168
cl::sycl::ext::intel::esimd::rndd
__ESIMD_API T rndd(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:547
cl::sycl::ext::intel::esimd::exp2
__ESIMD_API T exp2(T src, Sat sat={})
Scalar version.
Definition: math.hpp:387
max
simd< _Tp, _Abi > max(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
cl::sycl::ext::intel::esimd::detail::esimd_apply_sum
Definition: math.hpp:921
util.hpp
cl::sycl::ext::intel::esimd::unpack_mask
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
Definition: math.hpp:662
cl::sycl::ext::intel::esimd::rnde
__ESIMD_API T rnde(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:564
cl::sycl::ext::intel::esimd::pow
__ESIMD_API T pow(T src0, U src1, Sat sat={})
(scalar, scalar) version.
Definition: math.hpp:444
__ESIMD_UNARY_INTRINSIC_DEF
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
Definition: math.hpp:347
cl::sycl::ext::intel::esimd::detail::sum
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
Definition: math.hpp:1018
cl::sycl::length
float length(T p) __NOEXC
Definition: builtins.hpp:1032
cl::sycl::ext::intel::esimd::fbl
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)==4) &&(simd_view< BaseTy, RegionTy >::length==1), typename simd_view< BaseTy, RegionTy >::element_type > fbl(simd_view< BaseTy, RegionTy > src)
Scalar version of fbl, that takes simd_view object as an argument, e.g.
Definition: math.hpp:774
cl::sycl::ext::intel::esimd::simd_view
This class represents a reference to a sub-region of a base simd object.
Definition: types.hpp:32
__ESIMD_INTRINSIC_DEF
#define __ESIMD_INTRINSIC_DEF(name)
Definition: math.hpp:512
cl::sycl::ext::intel::esimd::cbit
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)<=4) &&(simd_view< BaseTy, RegionTy >::length==1), uint32_t > cbit(simd_view< BaseTy, RegionTy > src)
Scalar version of cbit, that takes simd_view object as an argument, e.g.
Definition: math.hpp:731
cl::sycl::ext::intel::esimd::simd
The main simd vector class.
Definition: types.hpp:31
cl::sycl::ext::intel::esimd::sqrt_ieee
__ESIMD_API T sqrt_ieee(T src, Sat sat={})
Scalar version.
Definition: math.hpp:394
math_intrin.hpp
cl::sycl::ext::intel::esimd::inv
__ESIMD_API T inv(T src, Sat sat={})
Scalar version.
Definition: math.hpp:377
cl::sycl::ext::intel::esimd::hmin
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
Definition: math.hpp:1056
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::ext::intel::esimd::detail::reduce_pair
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
Definition: math.hpp:979
simd.hpp
cl::sycl::ext::intel::esimd::pack_mask
__ESIMD_API std::enable_if_t<(N !=8 &&N !=16 &&N< 32), uint > pack_mask(simd_mask< N > src0)
pack_mask specialization when the number of elements N is not 8, 16 or 32.
Definition: math.hpp:670
cl::sycl::ext::intel::esimd::log2
__ESIMD_API T log2(T src, Sat sat={})
Scalar version.
Definition: math.hpp:383
types.hpp
cl::sycl::ext::intel::esimd::rsqrt
__ESIMD_API T rsqrt(T src, Sat sat={})
Scalar version.
Definition: math.hpp:399
cl::sycl::ext::intel::esimd::detail::esimd_apply_reduced_max
Definition: math.hpp:935
cl::sycl::ext::intel::esimd::detail::esimd_apply_prod
Definition: math.hpp:928
cl::sycl::ext::intel::esimd::trunc
__ESIMD_API RT trunc(float src0, Sat sat={})
Round to integral value using the round to zero rounding mode (scalar version).
Definition: math.hpp:628
cl::sycl::ext::intel::esimd::rndz
__ESIMD_API T rndz(float src0, Sat sat={})
Scalar version.
Definition: math.hpp:572
cl::sycl::ext::intel::esimd::ceil
ESIMD_INLINE RT ceil(float src0, Sat sat={})
"Ceiling" operation, scalar version - alias of rndu.
Definition: math.hpp:602
simd
Definition: simd.hpp:1027
cl::sycl::ext::intel::esimd::fbh
__ESIMD_API std::enable_if_t< std::is_integral< typename simd_view< BaseTy, RegionTy >::element_type >::value &&(sizeof(typename simd_view< BaseTy, RegionTy >::element_type)==4) &&(simd_view< BaseTy, RegionTy >::length==1), typename simd_view< BaseTy, RegionTy >::element_type > fbh(simd_view< BaseTy, RegionTy > src)
Scalar version of fbh, that takes simd_view object as an argument, e.g.
Definition: math.hpp:832
cl::sycl::detail::pi
Definition: backend_traits_opencl.hpp:193
common.hpp
operators.hpp
cl::sycl::ext::intel::esimd::sin
__ESIMD_API T sin(T src, Sat sat={})
Scalar version.
Definition: math.hpp:403
cl::sycl::ext::intel::esimd::detail::prod
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
Definition: math.hpp:1026
cl::sycl::ext::intel::esimd::dp4a
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
Definition: math.hpp:862
cl::sycl::ext::intel::esimd::exp
ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat={})
Definition: math.hpp:499
cl::sycl::ext::intel::esimd::simd_view::element_type
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
Definition: simd_view.hpp:63
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::ext::intel::esimd::sqrt
__ESIMD_API T sqrt(T src, Sat sat={})
Scalar version.
Definition: math.hpp:391
cl::sycl::ext::intel::esimd::uint
unsigned int uint
Definition: common.hpp:84
min
simd< _Tp, _Abi > min(const simd< _Tp, _Abi > &, const simd< _Tp, _Abi > &) noexcept
cl::sycl::ext::intel::esimd::ballot
__ESIMD_API std::enable_if_t< detail::is_type< T, ushort, uint >) &&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
Definition: math.hpp:686
cl::sycl::ext::intel::esimd::log
ESIMD_NODEBUG ESIMD_INLINE T log(T src0, Sat sat={})
Definition: math.hpp:484
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12