25 inline namespace _V1 {
26 namespace ext::intel::esimd {
68 template <
typename T0,
typename T1,
int SZ>
69 __ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v<T0> ||
70 std::is_same_v<T1, T0>,
73 if constexpr (detail::is_generic_floating_point_v<T0>)
74 return __esimd_sat<T0, T1, SZ>(src.
data());
75 else if constexpr (detail::is_generic_floating_point_v<T1>) {
76 if constexpr (std::is_unsigned_v<T0>)
77 return __esimd_fptoui_sat<T0, T1, SZ>(src.
data());
79 return __esimd_fptosi_sat<T0, T1, SZ>(src.
data());
80 }
else if constexpr (std::is_unsigned_v<T0>) {
81 if constexpr (std::is_unsigned_v<T1>)
82 return __esimd_uutrunc_sat<T0, T1, SZ>(src.
data());
84 return __esimd_ustrunc_sat<T0, T1, SZ>(src.
data());
86 if constexpr (std::is_signed_v<T1>)
87 return __esimd_sstrunc_sat<T0, T1, SZ>(src.
data());
89 return __esimd_sutrunc_sat<T0, T1, SZ>(src.
data());
97 template <
typename TRes,
typename TArg,
int SZ>
101 return convert<TRes>(Result);
104 template <
typename TRes,
typename TArg>
105 ESIMD_NODEBUG ESIMD_INLINE
106 std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
107 detail::is_esimd_scalar<TArg>::value,
109 __esimd_abs_common_internal(TArg
src0) {
111 simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
112 return convert<TRes>(Result)[0];
123 template <
typename TRes,
typename TArg,
int SZ>
124 __ESIMD_API std::enable_if_t<
125 !std::is_same<std::remove_const_t<TRes>, std::remove_const_t<TArg>>::value,
128 return detail::__esimd_abs_common_internal<TRes, TArg, SZ>(
src0.data());
136 template <
typename TRes,
typename TArg>
137 __ESIMD_API std::enable_if_t<!std::is_same<std::remove_const_t<TRes>,
138 std::remove_const_t<TArg>>::value &&
139 detail::is_esimd_scalar<TRes>::value &&
140 detail::is_esimd_scalar<TArg>::value,
141 std::remove_const_t<TRes>>
143 return detail::__esimd_abs_common_internal<TRes, TArg>(
src0);
154 return detail::__esimd_abs_common_internal<T1, T1, SZ>(
src0.data());
163 template <
typename T1>
164 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T1>::value,
165 std::remove_const_t<T1>>
167 return detail::__esimd_abs_common_internal<T1, T1>(
src0);
179 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
181 constexpr
bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
183 if constexpr (std::is_floating_point<T>::value) {
184 auto Result = __esimd_fmax<T, SZ>(
src0.data(),
src1.data());
185 if constexpr (is_sat)
186 Result = __esimd_sat<T, T, SZ>(Result);
188 }
else if constexpr (std::is_unsigned<T>::value) {
189 auto Result = __esimd_umax<T, SZ>(
src0.data(),
src1.data());
190 if constexpr (is_sat)
191 Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
194 auto Result = __esimd_smax<T, SZ>(
src0.data(),
src1.data());
195 if constexpr (is_sat)
196 Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
211 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
212 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>(
229 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
230 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>(
245 template <
typename T,
class Sat = saturation_off_tag>
246 ESIMD_NODEBUG ESIMD_INLINE
247 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(
max)(T
src0, T
src1,
264 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
266 constexpr
bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
268 if constexpr (std::is_floating_point<T>::value) {
269 auto Result = __esimd_fmin<T, SZ>(
src0.data(),
src1.data());
270 if constexpr (is_sat)
271 Result = __esimd_sat<T, T, SZ>(Result);
273 }
else if constexpr (std::is_unsigned<T>::value) {
274 auto Result = __esimd_umin<T, SZ>(
src0.data(),
src1.data());
275 if constexpr (is_sat)
276 Result = __esimd_uutrunc_sat<T, T, SZ>(Result);
279 auto Result = __esimd_smin<T, SZ>(
src0.data(),
src1.data());
280 if constexpr (is_sat)
281 Result = __esimd_sstrunc_sat<T, T, SZ>(Result);
296 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
297 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>(
314 template <
typename T,
int SZ,
class Sat = saturation_off_tag>
315 __ESIMD_API std::enable_if_t<detail::is_esimd_scalar<T>::value,
simd<T, SZ>>(
330 template <
typename T,
class Sat = saturation_off_tag>
331 ESIMD_NODEBUG ESIMD_INLINE
332 std::enable_if_t<detail::is_esimd_scalar<T>::value, T>(
min)(T
src0, T
src1,
345 #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \
347 template <class T, int N, class Sat = saturation_off_tag, \
348 class = std::enable_if_t<COND>> \
349 __ESIMD_API simd<T, N> name(simd<T, N> src, Sat sat = {}) { \
350 __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N> res = \
351 __esimd_##iname<T, N>(src.data()); \
352 if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
355 return esimd::saturate<T>(simd<T, N>(res)); \
359 template <typename T, class Sat = saturation_off_tag, \
360 class = std::enable_if_t<COND>> \
361 __ESIMD_API T name(T src, Sat sat = {}) { \
362 simd<T, 1> src_vec = src; \
363 simd<T, 1> res = name<T, 1>(src_vec, sat); \
367 #define __ESIMD_EMATH_COND \
368 detail::is_generic_floating_point_v<T> && (sizeof(T) <= 4)
370 #define __ESIMD_EMATH_IEEE_COND \
371 detail::is_generic_floating_point_v<T> && (sizeof(T) >= 4)
407 #undef __ESIMD_UNARY_INTRINSIC_DEF
409 #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \
411 template <class T, int N, class U, class Sat = saturation_off_tag, \
412 class = std::enable_if_t<COND>> \
413 __ESIMD_API simd<T, N> name(simd<T, N> src0, simd<U, N> src1, \
415 using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t<T>, N>; \
416 RawVecT src1_raw_conv = detail::convert_vector<T, U, N>(src1.data()); \
417 RawVecT res_raw = __esimd_##iname<T, N>(src0.data(), src1_raw_conv); \
418 if constexpr (std::is_same_v<Sat, saturation_off_tag>) \
421 return esimd::saturate<T>(simd<T, N>(res_raw)); \
425 template <class T, int N, class U, class Sat = saturation_off_tag, \
426 class = std::enable_if_t<COND>> \
427 __ESIMD_API simd<T, N> name(simd<T, N> src0, U src1, Sat sat = {}) { \
428 return name<T, N, U>(src0, simd<U, N>(src1), sat); \
432 template <class T, class U, class Sat = saturation_off_tag, \
433 class = std::enable_if_t<COND>> \
434 __ESIMD_API T name(T src0, U src1, Sat sat = {}) { \
435 simd<T, 1> res = name<T, 1, U>(simd<T, 1>(src0), simd<U, 1>(src1), sat); \
447 #undef __ESIMD_BINARY_INTRINSIC_DEF
448 #undef __ESIMD_EMATH_COND
449 #undef __ESIMD_EMATH_IEEE_COND
459 constexpr
float ln2 = 0.69314718f;
461 constexpr
float log2e = 1.442695f;
469 template <
class T,
int SZ,
class Sat = saturation_off_tag>
471 using CppT = __ESIMD_DNS::__cpp_t<T>;
473 esimd::log2<T, SZ, saturation_off_tag>(
src0) * detail::ln2;
475 if constexpr (std::is_same_v<Sat, saturation_off_tag>)
478 return esimd::saturate<T>(Result);
481 template <
class T,
class Sat = saturation_off_tag>
483 return esimd::log<T, 1>(
src0,
sat)[0];
490 template <
class T,
int SZ,
class Sat = saturation_off_tag>
492 using CppT = __ESIMD_DNS::__cpp_t<T>;
493 return esimd::exp2<T, SZ>(
src0 * detail::log2e,
sat);
496 template <
class T,
class Sat = saturation_off_tag>
498 return esimd::exp<T, 1>(
src0,
sat)[0];
510 #define __ESIMD_INTRINSIC_DEF(name) \
518 template <typename T, int SZ, class Sat = __ESIMD_NS::saturation_off_tag> \
519 __ESIMD_API __ESIMD_NS::simd<T, SZ> name(__ESIMD_NS::simd<float, SZ> src0, \
521 __ESIMD_NS::simd<float, SZ> Result = __esimd_##name<SZ>(src0.data()); \
522 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>) \
524 else if constexpr (!std::is_same_v<float, T>) { \
525 auto RawRes = __ESIMD_NS::saturate<float>(Result).data(); \
526 return __ESIMD_DNS::convert_vector<T, float, SZ>(std::move(RawRes)); \
528 return __ESIMD_NS::saturate<T>(Result); \
532 template <typename T, class Sat = __ESIMD_NS::saturation_off_tag> \
533 __ESIMD_API T name(float src0, Sat sat = {}) { \
534 __ESIMD_NS::simd<float, 1> Src0 = src0; \
535 __ESIMD_NS::simd<T, 1> Result = name<T>(Src0, sat); \
573 #undef __ESIMD_INTRINSIC_DEF
580 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
581 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
583 return esimd::rndd<RT, SZ>(
src0,
sat);
587 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
589 return esimd::rndd<RT, 1U>(
src0,
sat)[0];
593 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
594 ESIMD_INLINE __ESIMD_NS::simd<RT, SZ>
596 return esimd::rndu<RT, SZ>(
src0,
sat);
600 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
602 return esimd::rndu<RT, 1U>(
src0,
sat);
613 template <
typename RT,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
614 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
616 return esimd::rndz<RT, SZ>(
src0,
sat);
626 template <
typename RT,
class Sat = __ESIMD_NS::saturation_off_tag>
628 return esimd::rndz<RT, 1U>(
src0,
sat)[0];
645 ESIMD_NODEBUG ESIMD_INLINE
646 std::enable_if_t<(N == 8 || N == 16 || N == 32),
uint>
648 return __esimd_pack_mask<N>(
src0.data());
659 ESIMD_NODEBUG ESIMD_INLINE
662 return __esimd_unpack_mask<N>(
src0);
668 __ESIMD_API std::enable_if_t<(N != 8 && N != 16 && N < 32), uint>
670 simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0;
671 src_0.template select<N, 1>() =
src0.template bit_cast_view<ushort>();
681 template <
typename T,
int N>
682 __ESIMD_API std::enable_if_t<(std::is_same_v<T, ushort> ||
683 std::is_same_v<T, uint>)&&(N > 0 && N <= 32),
687 if constexpr (N == 8 || N == 16 || N == 32) {
688 return __esimd_pack_mask<N>(cmp.
data());
690 constexpr
int N1 = (N <= 8 ? 8 : N <= 16 ? 16 : 32);
692 res.template select<N, 1>() = cmp.
data();
693 return __esimd_pack_mask<N1>(res.
data());
701 template <
typename T,
int N>
702 ESIMD_NODEBUG ESIMD_INLINE
703 std::enable_if_t<std::is_integral<T>::value && (
sizeof(T) <= 4),
706 return __esimd_cbit<T, N>(src.
data());
711 template <
typename T>
713 std::enable_if_t<std::is_integral<T>::value && (
sizeof(T) <= 4), uint32_t>
724 template <
typename BaseTy,
typename RegionTy>
725 __ESIMD_API std::enable_if_t<
745 template <
typename T,
int N>
747 std::enable_if_t<std::is_integral<T>::value && (
sizeof(T) == 4),
simd<T, N>>
749 return __esimd_fbl<T, N>(src.
data());
754 template <
typename T>
755 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (
sizeof(T) == 4), T>
767 template <
typename BaseTy,
typename RegionTy>
768 __ESIMD_API std::enable_if_t<
788 template <
typename T,
int N>
789 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
790 std::is_signed<T>::value && (
sizeof(T) == 4),
793 return __esimd_sfbh<T, N>(src.
data());
802 template <
typename T,
int N>
803 __ESIMD_API std::enable_if_t<std::is_integral<T>::value &&
804 !std::is_signed<T>::value && (
sizeof(T) == 4),
807 return __esimd_ufbh<T, N>(src.
data());
812 template <
typename T>
813 __ESIMD_API std::enable_if_t<std::is_integral<T>::value && (
sizeof(T) == 4), T>
825 template <
typename BaseTy,
typename RegionTy>
826 __ESIMD_API std::enable_if_t<
856 template <
typename T1,
typename T2,
typename T3,
typename T4,
int N,
857 class Sat = saturation_off_tag>
858 __ESIMD_API std::enable_if_t<
859 detail::is_dword_type<T1>::value && detail::is_dword_type<T2>::value &&
860 detail::is_dword_type<T3>::value && detail::is_dword_type<T4>::value,
863 #if defined(__SYCL_DEVICE_ONLY__)
868 if constexpr (std::is_same_v<Sat, saturation_off_tag>) {
869 if constexpr (std::is_unsigned<T1>::value) {
870 if constexpr (std::is_unsigned<T2>::value) {
871 Result = __esimd_uudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
874 Result = __esimd_usdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
878 if constexpr (std::is_unsigned<T2>::value) {
879 Result = __esimd_sudp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
882 Result = __esimd_ssdp4a<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
887 if constexpr (std::is_unsigned<T1>::value) {
888 if constexpr (std::is_unsigned<T2>::value) {
889 Result = __esimd_uudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
892 Result = __esimd_usdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
896 if constexpr (std::is_unsigned<T2>::value) {
897 Result = __esimd_sudp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
900 Result = __esimd_ssdp4a_sat<T1, T2, T3, T4, N>(Src0.data(), Src1.data(),
907 __ESIMD_UNSUPPORTED_ON_HOST;
914 template <
typename... T>
921 template <
typename... T>
928 template <
typename... T>
930 if constexpr (std::is_floating_point<T1>::value) {
931 return __esimd_fmax<T1, SZ>(v1.
data(), v2.
data());
932 }
else if constexpr (std::is_unsigned<T1>::value) {
933 return __esimd_umax<T1, SZ>(v1.
data(), v2.
data());
935 return __esimd_smax<T1, SZ>(v1.
data(), v2.
data());
941 template <
typename... T>
943 if constexpr (std::is_floating_point<T1>::value) {
944 return __esimd_fmin<T1, SZ>(v1.
data(), v2.
data());
945 }
else if constexpr (std::is_unsigned<T1>::value) {
946 return __esimd_umin<T1, SZ>(v1.
data(), v2.
data());
948 return __esimd_smin<T1, SZ>(v1.
data(), v2.
data());
953 template <
typename T0,
typename T1,
int SZ,
954 template <
typename RT,
typename T,
int N>
class OpType>
956 if constexpr (
SZ == 1) {
960 "Invaid input for reduce_single - the vector size must "
962 constexpr
int N =
SZ / 2;
963 simd<T0, N> tmp = OpType<T0, T1, N>()(v.template select<N, 1>(0),
964 v.template select<N, 1>(N));
965 return reduce_single<T0, T0, N, OpType>(tmp);
969 template <
typename T0,
typename T1,
int N1,
int N2,
970 template <
typename RT,
typename T,
int N>
class OpType>
972 if constexpr (N1 == N2) {
974 return reduce_single<T0, T0, N1, OpType>(tmp);
975 }
else if constexpr (N1 < N2) {
976 simd<T0, N1> tmp1 = OpType<T0, T1, N1>()(v1, v2.template select<N1, 1>(0));
977 constexpr
int N = N2 - N1;
979 NT tmp2 = convert<T0>(v2.template select<N, 1>(N1).
read());
980 return reduce_pair<T0, T0, N1, N, OpType>(tmp1, tmp2);
983 "Invaid input for reduce_pair - N1 must be power of two.");
984 constexpr
int N = N1 / 2;
985 simd<T0, N> tmp = OpType<T0, T1, N>()(v1.template select<N, 1>(0),
986 v1.template select<N, 1>(N));
988 NT tmp2 = convert<T0>(v2);
989 return reduce_pair<T0, T0, N, N2, OpType>(tmp, tmp2);
993 template <
typename T0,
typename T1,
int SZ,
994 template <
typename RT,
typename T,
int N>
class OpType>
998 return reduce_single<T0, T1, SZ, OpType>(v);
1000 constexpr
unsigned N1 = 1u << detail::log2<SZ>();
1001 constexpr
unsigned N2 =
SZ - N1;
1005 return reduce_pair<T0, T1, N1, N2, OpType>(v1, v2);
1009 template <
typename T0,
typename T1,
int SZ>
1011 using TT = detail::computation_type_t<simd<T1, SZ>>;
1012 using RT =
typename TT::element_type;
1013 T0 retv = reduce<RT, T1, SZ, esimd_apply_sum>(v);
1017 template <
typename T0,
typename T1,
int SZ>
1019 using TT = detail::computation_type_t<simd<T1, SZ>>;
1020 using RT =
typename TT::element_type;
1021 T0 retv = reduce<RT, T1, SZ, esimd_apply_prod>(v);
1034 template <
typename T0,
typename T1,
int SZ>
1036 T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_max>(v);
1047 template <
typename T0,
typename T1,
int SZ>
1049 T0 retv = detail::reduce<T1, T1, SZ, detail::esimd_apply_reduced_min>(v);
1067 template <
typename T0,
typename T1,
int SZ,
typename BinaryOperation>
1069 if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1070 std::plus<>>::value) {
1071 T0 retv = detail::sum<T0>(v);
1073 }
else if constexpr (std::is_same<detail::remove_cvref_t<BinaryOperation>,
1074 std::multiplies<>>::value) {
1075 T0 retv = detail::prod<T0>(v);
1087 enum class bfn_t : uint8_t {
x = 0xAA,
y = 0xCC,
z = 0xF0 };
1090 uint8_t val =
static_cast<uint8_t
>(
x);
1092 return static_cast<bfn_t>(res);
1096 uint8_t arg0 =
static_cast<uint8_t
>(
x);
1097 uint8_t arg1 =
static_cast<uint8_t
>(
y);
1098 uint8_t res = arg0 | arg1;
1099 return static_cast<bfn_t>(res);
1103 uint8_t arg0 =
static_cast<uint8_t
>(
x);
1104 uint8_t arg1 =
static_cast<uint8_t
>(
y);
1105 uint8_t res = arg0 & arg1;
1106 return static_cast<bfn_t>(res);
1110 uint8_t arg0 =
static_cast<uint8_t
>(
x);
1111 uint8_t arg1 =
static_cast<uint8_t
>(
y);
1112 uint8_t res = arg0 ^ arg1;
1113 return static_cast<bfn_t>(res);
1124 template <bfn_t FuncControl,
typename T,
int N>
1125 __ESIMD_API std::enable_if_t<std::is_integral_v<T>, __ESIMD_NS::simd<T, N>>
1126 bfn(__ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
1127 __ESIMD_NS::simd<T, N> src2) {
1128 if constexpr ((
sizeof(T) == 8) || ((
sizeof(T) == 1) && (N % 4 == 0)) ||
1129 ((
sizeof(T) == 2) && (N % 2 == 0))) {
1133 auto Result = __ESIMD_NS::bfn<FuncControl>(
1134 src0.template bit_cast_view<int32_t>().read(),
1135 src1.template bit_cast_view<int32_t>().read(),
1136 src2.template bit_cast_view<int32_t>().read());
1137 return Result.template bit_cast_view<T>();
1138 }
else if constexpr (
sizeof(T) == 2 ||
sizeof(T) == 4) {
1139 constexpr uint8_t FC =
static_cast<uint8_t
>(FuncControl);
1140 return __esimd_bfn<FC, T, N>(
src0.data(),
src1.data(), src2.data());
1141 }
else if constexpr (N % 2 == 0) {
1143 auto Result = __ESIMD_NS::bfn<FuncControl>(
1144 src0.template bit_cast_view<int16_t>().read(),
1145 src1.template bit_cast_view<int16_t>().read(),
1146 src2.template bit_cast_view<int16_t>().read());
1147 return Result.template bit_cast_view<T>();
1150 __ESIMD_NS::simd<T, N + 1> Src0, Src1, Src2;
1151 Src0.template select<N, 1>() =
src0;
1152 Src1.template select<N, 1>() =
src1;
1153 Src2.template select<N, 1>() = src2;
1154 auto Result = __ESIMD_NS::bfn<FuncControl>(Src0, Src1, Src2);
1155 return Result.template select<N, 1>();
1166 template <bfn_t FuncControl,
typename T>
1167 ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
1168 __ESIMD_DNS::is_esimd_scalar<T>::value && std::is_integral_v<T>, T>
1170 __ESIMD_NS::simd<T, 1> Src0 =
src0;
1171 __ESIMD_NS::simd<T, 1> Src1 =
src1;
1172 __ESIMD_NS::simd<T, 1> Src2 = src2;
1173 __ESIMD_NS::simd<T, 1> Result =
1174 esimd::bfn<FuncControl, T, 1>(Src0, Src1, Src2);
1187 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1188 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N>
src0,
1189 __ESIMD_NS::simd<uint32_t, N>
src1) {
1190 std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1191 __ESIMD_DNS::vector_type_t<uint32_t, N>>
1192 Result = __esimd_addc<uint32_t, N>(
src0.data(),
src1.data());
1194 carry = Result.first;
1195 return Result.second;
1205 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1206 addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N>
src0,
1208 __ESIMD_NS::simd<uint32_t, N> Src1V =
src1;
1219 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1220 addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t
src0,
1221 __ESIMD_NS::simd<uint32_t, N>
src1) {
1222 __ESIMD_NS::simd<uint32_t, N> Src0V =
src0;
1232 __ESIMD_API uint32_t
addc(uint32_t &carry, uint32_t
src0, uint32_t
src1) {
1233 __ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
1234 __ESIMD_NS::simd<uint32_t, 1> Src0V =
src0;
1235 __ESIMD_NS::simd<uint32_t, 1> Src1V =
src1;
1236 __ESIMD_NS::simd<uint32_t, 1> Res =
addc(CarryV, Src0V, Src1V);
1249 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1250 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N>
src0,
1251 __ESIMD_NS::simd<uint32_t, N>
src1) {
1252 std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
1253 __ESIMD_DNS::vector_type_t<uint32_t, N>>
1254 Result = __esimd_subb<uint32_t, N>(
src0.data(),
src1.data());
1256 borrow = Result.first;
1257 return Result.second;
1268 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1269 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N>
src0,
1271 __ESIMD_NS::simd<uint32_t, N> Src1V =
src1;
1283 __ESIMD_API __ESIMD_NS::simd<uint32_t, N>
1284 subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t
src0,
1285 __ESIMD_NS::simd<uint32_t, N>
src1) {
1286 __ESIMD_NS::simd<uint32_t, N> Src0V =
src0;
1297 __ESIMD_API uint32_t
subb(uint32_t &borrow, uint32_t
src0, uint32_t
src1) {
1298 __ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
1299 __ESIMD_NS::simd<uint32_t, 1> Src0V =
src0;
1300 __ESIMD_NS::simd<uint32_t, 1> Src1V =
src1;
1301 __ESIMD_NS::simd<uint32_t, 1> Res =
subb(BorrowV, Src0V, Src1V);
1302 borrow = BorrowV[0];
1308 __ESIMD_API uint64_t
rdtsc() {
1309 __ESIMD_NS::simd<uint32_t, 4> retv = __esimd_timestamp();
1310 return retv.template bit_cast_view<uint64_t>()[0];
const auto & data() const noexcept
raw_vector_type data() const
This class represents a reference to a sub-region of a base simd object.
static constexpr int length
typename ShapeTy::element_type element_type
The element type of this class, which could be different from the element type of the base object typ...
The main simd vector class.
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)<=4), simd< uint32_t, N > > cbit(simd< T, N > src)
Count number of bits set in the source operand per element.
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), uint > pack_mask(simd_mask< N > src0)
Pack a simd_mask into a single unsigned 32-bit integer value.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&(sizeof(T)==4), simd< T, N > > fbl(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the least signif...
__ESIMD_API std::enable_if_t<(std::is_same_v< T, ushort >||std::is_same_v< T, uint >)&&(N > 0 &&N<=32), uint > ballot(simd< T, N > mask)
Compare source vector elements against zero and return a bitfield combining the comparison result.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_signed< T >::value &&(sizeof(T)==4), simd< T, N > > fbh(simd< T, N > src)
Find the per element number of the first bit set in the source operand starting from the most signifi...
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<(N==8||N==16||N==32), simd_mask< N > > unpack_mask(uint src0)
Unpack an unsigned 32-bit integer value into a simd_mask.
#define __ESIMD_INTRINSIC_DEF(name)
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndu(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-up (also known as ceil).
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndd(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-down (also known as floor).
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > trunc(const sycl::ext::intel::esimd::simd< float, SZ > &src0, Sat sat={})
Round to integral value using the round to zero rounding mode (vector version).
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > floor(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Floor" operation, vector version - alias of rndd.
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rnde(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-even (also known as round).
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, SZ > ceil(const sycl::ext::intel::esimd::simd< float, SZ > src0, Sat sat={})
"Ceiling" operation, vector version - alias of rndu.
__ESIMD_API sycl::ext::intel::esimd ::simd< T, SZ > rndz(sycl::ext::intel::esimd ::simd< float, SZ > src0, Sat sat={})
Round-to-zero (also known as trunc).
__ESIMD_API std::enable_if_t< std::is_integral_v< T >, sycl::ext::intel::esimd::simd< T, N > > bfn(sycl::ext::intel::esimd::simd< T, N > src0, sycl::ext::intel::esimd::simd< T, N > src1, sycl::ext::intel::esimd::simd< T, N > src2)
Performs binary function computation with three vector operands.
static constexpr bfn_t operator&(bfn_t x, bfn_t y)
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
static constexpr bfn_t operator|(bfn_t x, bfn_t y)
static constexpr bfn_t operator~(bfn_t x)
static constexpr bfn_t operator^(bfn_t x, bfn_t y)
__ESIMD_API simd< T, N > cos(simd< T, N > src, Sat sat={})
Cosine.
__ESIMD_API simd< T, N > sqrt(simd< T, N > src, Sat sat={})
Square root.
__ESIMD_API simd< T, N > rsqrt(simd< T, N > src, Sat sat={})
Square root reciprocal - calculates 1/sqrt(x).
#define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname)
__ESIMD_API simd< T, N > pow(simd< T, N > src0, simd< U, N > src1, Sat sat={})
Power - calculates src0 in power of src1.
#define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname)
__ESIMD_API simd< T, N > inv(simd< T, N > src, Sat sat={})
Inversion - calculates (1/x).
__ESIMD_API simd< T, N > sqrt_ieee(simd< T, N > src, Sat sat={})
IEEE754-compliant square root. Supports float and double.
#define __ESIMD_EMATH_COND
__ESIMD_API simd< T, N > div_ieee(simd< T, N > src0, simd< U, N > src1, Sat sat={})
IEEE754-compliant floating-point division. Supports float and double.
__ESIMD_API simd< T, N > sin(simd< T, N > src, Sat sat={})
Sine.
#define __ESIMD_EMATH_IEEE_COND
__ESIMD_API simd< T, N > log2(simd< T, N > src, Sat sat={})
Logarithm base 2.
__ESIMD_API simd< T, N > exp2(simd< T, N > src, Sat sat={})
Exponent base 2.
ESIMD_INLINE ESIMD_NODEBUG T0 hmax(simd< T1, SZ > v)
ESIMD_DETAIL.
__ESIMD_API SZ simd< T, SZ > src1
__ESIMD_API simd< T, SZ >(max)(simd< T
Selects component-wise the maximum of the two vectors.
ESIMD_DETAIL ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > log(simd< T, SZ > src0, Sat sat={})
Computes the natural logarithm of the given argument.
__ESIMD_API SZ simd< T, SZ > Sat sat
ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd< T1, SZ > v, BinaryOperation op)
Performs reduction over elements of the input vector.
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() min(simd< T, SZ > src0, T src1, Sat sat={})
__ESIMD_API std::enable_if_t<!detail::is_generic_floating_point_v< T0 >||std::is_same_v< T1, T0 >, simd< T0, SZ > > saturate(simd< T1, SZ > src)
Conversion of input vector elements of type T1 into vector of elements of type T0 with saturation.
__ESIMD_API SZ simd< T, SZ > Sat int SZ
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > addc(sycl::ext::intel::esimd::simd< uint32_t, N > &carry, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs add with carry of 2 unsigned 32-bit vectors.
__ESIMD_API SZ simd< T, SZ > Sat int class simd< T, SZ >() max(simd< T, SZ > src0, T src1, Sat sat={})
__ESIMD_API uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
ESIMD_INLINE ESIMD_NODEBUG T0 hmin(simd< T1, SZ > v)
Performs 'minimum' operation reduction over elements of the input vector, that is,...
ESIMD_NODEBUG ESIMD_INLINE simd< T, SZ > exp(simd< T, SZ > src0, Sat sat={})
Computes e raised to the power of the given argument.
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
__ESIMD_API sycl::ext::intel::esimd::simd< uint32_t, N > subb(sycl::ext::intel::esimd::simd< uint32_t, N > &borrow, sycl::ext::intel::esimd::simd< uint32_t, N > src0, sycl::ext::intel::esimd::simd< uint32_t, N > src1)
Performs substraction with borrow of 2 unsigned 32-bit vectors.
__ESIMD_API std::enable_if_t< detail::is_dword_type< T1 >::value &&detail::is_dword_type< T2 >::value &&detail::is_dword_type< T3 >::value &&detail::is_dword_type< T4 >::value, simd< T1, N > > dp4a(simd< T2, N > src0, simd< T3, N > src1, simd< T4, N > src2, Sat sat={})
DP4A.
ESIMD_DETAIL __ESIMD_API std::enable_if_t< !std::is_same< std::remove_const_t< TRes >, std::remove_const_t< TArg > >::value, simd< TRes, SZ > > abs(simd< TArg, SZ > src0)
Get absolute value (vector version)
T0 reduce(simd< T1, SZ > v)
ESIMD_INLINE ESIMD_NODEBUG T0 prod(simd< T1, SZ > v)
T0 reduce_single(simd< T1, SZ > v)
ESIMD_INLINE ESIMD_NODEBUG T0 sum(simd< T1, SZ > v)
T0 reduce_pair(simd< T1, N1 > v1, simd< T1, N2 > v2)
constexpr ESIMD_INLINE bool isPowerOf2(unsigned int n)
Check if a given 32 bit positive integer is a power of 2 at compile time.
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)
simd< T0, SZ > operator()(simd< T1, SZ > v1, simd< T1, SZ > v2)