26 namespace ext::intel::esimd {
68 template <
typename T0,
typename T1,
int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70 std::is_same_v<T1, T0>,
73 if constexpr (detail::is_generic_floating_point_v<T0>)
74 return __esimd_sat<T0, T1, SZ>(src.data());
75 else if constexpr (detail::is_generic_floating_point_v<T1>) {
76 if constexpr (std::is_unsigned<T0>::value)
77 return __esimd_fptoui_sat<T0, T1, SZ>(src.data());
79 return __esimd_fptosi_sat<T0, T1, SZ>(src.data());
80 }
else if constexpr (std::is_unsigned<T0>::value) {
81 if constexpr (std::is_unsigned<T1>::value)
82 return __esimd_uutrunc_sat<T0, T1, SZ>(src.data());
84 return __esimd_ustrunc_sat<T0, T1, SZ>(src.data());
86 if constexpr (std::is_signed<T1>::value)
87 return __esimd_sstrunc_sat<T0, T1, SZ>(src.data());
89 return __esimd_sutrunc_sat<T0, T1, SZ>(src.data());
97 template <
typename TRes,
typename TArg,
int SZ>
101 return convert<TRes>(Result);
104 template <
typename TRes,
typename TArg>
106 ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
107 detail::is_esimd_scalar<TArg>::value,
109 __esimd_abs_common_internal(TArg src0) {
111 simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
112 return convert<TRes>(Result)[0];
123 template <
typename TRes,
typename TArg,
int SZ>
125 !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
128 return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(src0.data());
136 template <
typename TRes,
typename TArg>
137 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
138 std::remove_const_t<TArg>>::value &&
139 detail::is_esimd_scalar<TRes>::value &&
140 detail::is_esimd_scalar<TArg>::value,
141 std::remove_const_t<TRes>>
143 return detail::__esimd_abs_common_internal<TRes, TArg>(src0);
154 return detail::__esimd_abs_common_internal<T1, T1, SZ>(src0.data());
163 template <
typename T1>
164 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
165 std::remove_const_t<T1>>
167 return detail::__esimd_abs_common_internal<T1, T1>(src0);
179 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
181 constexpr
bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
183 if constexpr (std::is_floating_point<T>::value) {
184 auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
185 if constexpr (is_sat)
186 Result = __esimd_sat<T, T, SZ>(Result);
188 }
else if constexpr (std::is_unsigned<T>::value) {
189 auto Result = __esimd_umax<T, SZ>(src0.data(), src1.data());
190 if constexpr (is_sat)
191 Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
194 auto Result = __esimd_smax<T, SZ>(src0.data(), src1.data());
195 if constexpr (is_sat)
196 Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
211 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
212 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>
215 simd<T, SZ> Result = esimd::max<T>(src0, Src1, sat);
229 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
230 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>
233 simd<T, SZ> Result = esimd::max<T>(Src0, src1, sat);
245 template <
typename T,
class Sat = saturation_off_tag>
247 ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
251 simd<T, 1> Result = esimd::max<T>(Src0, Src1, sat);
264 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
266 constexpr
bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
268 if constexpr (std::is_floating_point<T>::value) {
269 auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
270 if constexpr (is_sat)
271 Result = __esimd_sat<T, T, SZ>(Result);
273 }
else if constexpr (std::is_unsigned<T>::value) {
274 auto Result = __esimd_umin<T, SZ>(src0.data(), src1.data());
275 if constexpr (is_sat)
276 Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
279 auto Result = __esimd_smin<T, SZ>(src0.data(), src1.data());
280 if constexpr (is_sat)
281 Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
296 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
297 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>
300 simd<T, SZ> Result = esimd::min<T>(src0, Src1, sat);
314 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
315 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>
318 simd<T, SZ> Result = esimd::min<T>(Src0, src1, sat);
330 template <
typename T,
class Sat = saturation_off_tag>
332 ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<T>::value, T>
336 simd<T, 1> Result = esimd::min<T>(Src0, Src1, sat);
345 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
347 template <class T, int N, class Sat = saturation_off_tag, \
348 class = std::enable_if_t<COND>> \
349 __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
350 __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
351 __esimd_##iname<T, N>(src.data()); \
352 if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
355 return esimd::saturate<T>(res); \
359 template <typename T, class Sat = saturation_off_tag, \
360 class = std::enable_if_t<COND>> \
361 __ESIMD_API T name(T src, Sat sat = {}) { \
362 simd<T, 1> src_vec = src; \
363 simd<T, 1> res = name<T, 1>(src_vec, sat); \
367 #define __ESIMD_EMATH_COND \
368 detail::is_generic_floating_point_v<T> && (sizeof(T) <= 4)
370 #define __ESIMD_EMATH_IEEE_COND \
371 detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
407 #undef __ESIMD_UNARY_INTRINSIC_DEF
409 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
411 template <class T, int N, class U, class Sat = saturation_off_tag, \
412 class = std::enable_if_t<COND>> \
413 __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
415 using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
416 RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
417 RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
418 if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
421 return esimd::saturate<T>(simd<T, N>(res_raw)); \
425 template <class T, int N, class U, class Sat = saturation_off_tag, \
426 class = std::enable_if_t<COND>> \
427 __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
428 return name<T, N, U>(src0, simd<U, N>(src1), sat); \
432 template <class T, class U, class Sat = saturation_off_tag, \
433 class = std::enable_if_t<COND>> \
434 __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
435 simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
447 #undef __ESIMD_BINARY_INTRINSIC_DEF
448 #undef __ESIMD_EMATH_COND
449 #undef __ESIMD_EMATH_IEEE_COND
459 constexpr
float ln2 = 0.69314718f;
461 constexpr
float log2e = 1.442695f;
469 template <
class T,
int SZ,
class Sat = saturation_off_tag>
471 using CppT = __ESIMD_DNS::__cpp_t<T>;
473 esimd::log2<T, SZ, saturation_off_tag>(src0) * detail::ln2;
475 if constexpr (std::is_same_v<Sat, saturation_off_tag>)
478 return esimd::saturate<T>(Result);
481 template <
class T,
class Sat = saturation_off_tag>
482 ESIMD_NODEBUG ESIMD_INLINE
T log(
T src0, Sat sat = {}) {
483 return esimd::log<T, 1>(src0, sat)[0];
490 template <
class T,
int SZ,
class Sat = saturation_off_tag>
492 using CppT = __ESIMD_DNS::__cpp_t<T>;
493 return esimd::exp2<T, SZ>(src0 * detail::log2e, sat);
496 template <
class T,
class Sat = saturation_off_tag>
497 ESIMD_NODEBUG ESIMD_INLINE
T exp(
T src0, Sat sat = {}) {
498 return esimd::exp<T, 1>(src0, sat)[0];
510 #define __ESIMD_INTRINSIC_DEF(name) \
518 template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
519 __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
521 __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
522 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
524 else if constexpr (!std::is_same_v<float, T>) { \
525 auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
526 return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
528 return __ESIMD_NS::saturate<T>(Result); \
532 template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
533 __ESIMD_API T name(float src0, Sat sat = {}) { \
534 __ESIMD_NS::simd<float, 1> Src0 = src0; \
535 __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
573 #undef __ESIMD_INTRINSIC_DEF
580 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
581 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
582 floor(
const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
583 return esimd::rndd<RT, SZ>(src0, sat);
587 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
588 ESIMD_INLINE RT
floor(
float src0, Sat sat = {}) {
589 return esimd::rndd<RT, 1U>(src0, sat)[0];
593 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
594 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
595 ceil(
const __ESIMD_NS::simd<float, SZ> src0, Sat sat = {}) {
596 return esimd::rndu<RT, SZ>(src0, sat);
600 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
601 ESIMD_INLINE RT
ceil(
float src0, Sat sat = {}) {
602 return esimd::rndu<RT, 1U>(src0, sat);
613 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
614 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
615 trunc(
const __ESIMD_NS::simd<float, SZ> &src0, Sat sat = {}) {
616 return esimd::rndz<RT, SZ>(src0, sat);
626 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
627 __ESIMD_API RT
trunc(
float src0, Sat sat = {}) {
628 return esimd::rndz<RT, 1U>(src0, sat)[0];
648 return __esimd_pack_mask<N>(src0.data());
662 return __esimd_unpack_mask<N>(src0);
670 simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
671 src_0.template select<N, 1>() = src0.template bit_cast_view<ushort>();
681 template <
typename T,
int N>
683 std::is_same_v<T, uint>)&&(N > 0 && N <= 32),
687 if constexpr (N == 8 || N == 16 || N == 32) {
688 return __esimd_pack_mask<N>(cmp.data());
690 constexpr
int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
692 res.template select<N, 1>() = cmp.data();
693 return __esimd_pack_mask<N1>(res.data());
701 template <
typename T,
int N>
705 return __esimd_cbit<T, N>(src.data());
710 template <
typename T>
712 std::enable_if_t<std::is_integral<T>::value && (
sizeof(
T) <= 4), uint32_t>
723 template <
typename BaseTy,
typename RegionTy>
726 typename simd_view<BaseTy, RegionTy>::element_type>::value &&
727 (
sizeof(
typename simd_view<BaseTy, RegionTy>::element_type) <= 4) &&
728 (simd_view<BaseTy, RegionTy>::length == 1),
744 template <
typename T,
int N>
746 std::enable_if_t<std::is_integral<T>::value && (
sizeof(
T) == 4),
simd<T, N>>
748 return __esimd_fbl<T, N>(src.data());
753 template <
typename T>
754 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (
sizeof(
T) == 4),
T>
766 template <
typename BaseTy,
typename RegionTy>
769 typename simd_view<BaseTy, RegionTy>::element_type>::value &&
770 (
sizeof(
typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
771 (simd_view<BaseTy, RegionTy>::length == 1),
787 template <
typename T,
int N>
788 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
789 std::is_signed<T>::value && (
sizeof(T) == 4),
792 return __esimd_sfbh<T, N>(src.data());
801 template <
typename T,
int N>
802 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
803 !std::is_signed<T>::value && (
sizeof(T) == 4),
806 return __esimd_ufbh<T, N>(src.data());
811 template <
typename T>
812 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (
sizeof(
T) == 4),
T>
824 template <
typename BaseTy,
typename RegionTy>
827 typename simd_view<BaseTy, RegionTy>::element_type>::value &&
828 (
sizeof(
typename simd_view<BaseTy, RegionTy>::element_type) == 4) &&
829 (simd_view<BaseTy, RegionTy>::length == 1),
855 template <
typename T1,
typename T2,
typename T3,
typename T4,
int N,
856 class Sat = saturation_off_tag>
858 detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
859 detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
867 #if defined(__SYCL_DEVICE_ONLY__)
868 if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
869 if constexpr (std::is_unsigned<T1>::value) {
870 if constexpr (std::is_unsigned<T2>::value) {
871 Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
874 Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
878 if constexpr (std::is_unsigned<T2>::value) {
879 Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
882 Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
887 if constexpr (std::is_unsigned<T1>::value) {
888 if constexpr (std::is_unsigned<T2>::value) {
889 Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
892 Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
896 if constexpr (std::is_unsigned<T2>::value) {
897 Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
900 Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
907 __esimd_dp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(), Src2.data());
909 if (std::is_same_v<Sat, saturation_on_tag>)
910 Result = esimd::saturate<T1>(tmp);
912 Result = convert<T1>(tmp);
913 #endif // __SYCL_DEVICE_ONLY__
921 template <
typename... T>
928 template <
typename... T>
935 template <
typename... T>
937 if constexpr (std::is_floating_point<T1>::value) {
938 return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
939 }
else if constexpr (std::is_unsigned<T1>::value) {
940 return __esimd_umax<T1, SZ>(v1.data(), v2.data());
942 return __esimd_smax<T1, SZ>(v1.data(), v2.data());
948 template <
typename... T>
950 if constexpr (std::is_floating_point<T1>::value) {
951 return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
952 }
else if constexpr (std::is_unsigned<T1>::value) {
953 return __esimd_umin<T1, SZ>(v1.data(), v2.data());
955 return __esimd_smin<T1, SZ>(v1.data(), v2.data());
960 template <
typename T0,
typename T1,
int SZ,
961 template <
typename RT,
typename T,
int N>
class OpType>
963 if constexpr (SZ == 1) {
967 "Invaid input for reduce_single - the vector size must "
969 constexpr
int N = SZ / 2;
970 simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
971 v.template select<N, 1>(N));
972 return reduce_single<T0, T0, N, OpType>(tmp);
976 template <
typename T0,
typename T1,
int N1,
int N2,
977 template <
typename RT,
typename T,
int N>
class OpType>
979 if constexpr (N1 == N2) {
981 return reduce_single<T0, T0, N1, OpType>(tmp);
982 }
else if constexpr (N1 < N2) {
983 simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
984 constexpr
int N = N2 - N1;
986 NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).read());
987 return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
990 "Invaid input for reduce_pair - N1 must be power of two.");
991 constexpr
int N = N1 / 2;
992 simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
993 v1.template select<N, 1>(N));
995 NT tmp2 = convert<T0>(v2);
996 return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
1000 template <
typename T0,
typename T1,
int SZ,
1001 template <
typename RT,
typename T,
int N>
class OpType>
1005 return reduce_single<T0, T1, SZ, OpType>(v);
1007 constexpr
unsigned N1 = 1u << detail::log2<SZ>();
1008 constexpr
unsigned N2 = SZ - N1;
1012 return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1016 template <
typename T0,
typename T1,
int SZ>
1018 using TT = detail::computation_type_t<simd<T1, SZ>>;
1019 using RT =
typename TT::element_type;
1020 T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1024 template <
typename T0,
typename T1,
int SZ>
1026 using TT = detail::computation_type_t<simd<T1, SZ>>;
1027 using RT =
typename TT::element_type;
1028 T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1041 template <
typename T0,
typename T1,
int SZ>
1043 T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1054 template <
typename T0,
typename T1,
int SZ>
1056 T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1074 template <
typename T0,
typename T1,
int SZ,
typename BinaryOperation>
1076 if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1077 std::plus<>>::value) {
1078 T0 retv = detail::sum<T0>(v);
1080 }
else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1081 std::multiplies<>>::value) {
1082 T0 retv = detail::prod<T0>(v);