21 inline namespace _V1 {
22 namespace ext::intel::experimental::esimd {
37 template <
typename T0,
typename T1,
int SZ,
typename U,
38 class Sat = __ESIMD_NS::saturation_off_tag>
39 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
40 std::is_integral<T1>::value &&
41 std::is_integral<U>::value,
42 __ESIMD_NS::simd<T0, SZ>>
45 __ESIMD_DNS::computation_type_t<decltype(
src0), int32_t>;
46 ComputationTy Src0 =
src0;
47 ComputationTy Src1 =
src1;
49 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_on_tag>) {
50 if constexpr (std::is_unsigned<T0>::value) {
51 if constexpr (std::is_unsigned<
52 typename ComputationTy::element_type>::value)
53 return __esimd_uushl_sat<T0, typename ComputationTy::element_type, SZ>(
54 Src0.data(), Src1.data());
56 return __esimd_usshl_sat<T0, typename ComputationTy::element_type, SZ>(
57 Src0.data(), Src1.data());
59 if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
60 return __esimd_sushl_sat<T0, typename ComputationTy::element_type, SZ>(
61 Src0.data(), Src1.data());
63 return __esimd_ssshl_sat<T0, typename ComputationTy::element_type, SZ>(
64 Src0.data(), Src1.data());
67 if constexpr (std::is_unsigned<T0>::value) {
68 if constexpr (std::is_unsigned<
69 typename ComputationTy::element_type>::value)
70 return __esimd_uushl<T0, typename ComputationTy::element_type, SZ>(
71 Src0.data(), Src1.data());
73 return __esimd_usshl<T0, typename ComputationTy::element_type, SZ>(
74 Src0.data(), Src1.data());
76 if constexpr (std::is_signed<typename ComputationTy::element_type>::value)
77 return __esimd_sushl<T0, typename ComputationTy::element_type, SZ>(
78 Src0.data(), Src1.data());
80 return __esimd_ssshl<T0, typename ComputationTy::element_type, SZ>(
81 Src0.data(), Src1.data());
95 template <
typename T0,
typename T1,
typename T2,
96 class Sat = __ESIMD_NS::saturation_off_tag>
97 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
98 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
99 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
100 std::is_integral<T0>::value &&
101 std::is_integral<T1>::value &&
102 std::is_integral<T2>::value,
103 std::remove_const_t<T0>>
105 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
106 __ESIMD_NS::simd<T0, 1> Result =
107 esimd::shl<T0, T1, 1, T2, Sat>(Src0,
src1,
sat);
121 template <
typename T0,
typename T1,
int SZ,
typename U,
122 class Sat = __ESIMD_NS::saturation_off_tag>
123 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
124 std::is_integral<T1>::value &&
125 std::is_integral<U>::value,
126 __ESIMD_NS::simd<T0, SZ>>
128 using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
129 typedef typename std::make_unsigned<IntermedTy>::type ComputationTy;
130 __ESIMD_NS::simd<ComputationTy, SZ> Src0 =
src0;
131 __ESIMD_NS::simd<ComputationTy, SZ> Src1 =
src1;
133 __ESIMD_NS::simd<ComputationTy, SZ> Result = Src0.data() >> Src1.data();
135 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
138 return __ESIMD_NS::saturate<T0>(Result);
151 template <
typename T0,
typename T1,
typename T2,
152 class Sat = __ESIMD_NS::saturation_off_tag>
153 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
154 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
155 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
156 std::is_integral<T0>::value &&
157 std::is_integral<T1>::value &&
158 std::is_integral<T2>::value,
159 std::remove_const_t<T0>>
161 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
162 __ESIMD_NS::simd<T0, 1> Result =
163 esimd::lsr<T0, T1, 1, T2, Sat>(Src0,
src1,
sat);
178 template <
typename T0,
typename T1,
int SZ,
typename U,
179 class Sat = __ESIMD_NS::saturation_off_tag>
180 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
181 std::is_integral<T1>::value &&
182 std::is_integral<U>::value,
183 __ESIMD_NS::simd<T0, SZ>>
185 using IntermedTy = __ESIMD_DNS::computation_type_t<T1, T1>;
186 typedef typename std::make_signed<IntermedTy>::type ComputationTy;
187 __ESIMD_NS::simd<ComputationTy, SZ> Src0 =
src0;
189 __ESIMD_NS::simd<ComputationTy, SZ> Result = Src0 >>
src1;
191 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
194 return __ESIMD_NS::saturate<T0>(Result);
207 template <
typename T0,
typename T1,
typename T2,
208 class Sat = __ESIMD_NS::saturation_off_tag>
209 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
210 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
211 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
212 std::is_integral<T0>::value &&
213 std::is_integral<T1>::value &&
214 std::is_integral<T2>::value,
215 std::remove_const_t<T0>>
217 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
218 __ESIMD_NS::simd<T0, 1> Result =
219 esimd::asr<T0, T1, 1, T2, Sat>(Src0,
src1,
sat);
233 template <
typename T0,
typename T1,
int SZ,
typename U,
234 class Sat = __ESIMD_NS::saturation_off_tag>
235 __ESIMD_API std::enable_if_t<std::is_integral<T0>::value &&
236 std::is_integral<T1>::value &&
237 std::is_integral<U>::value,
238 __ESIMD_NS::simd<T0, SZ>>
240 if constexpr (std::is_unsigned<T1>::value) {
241 return esimd::lsr<T0, T1, SZ, U, Sat>(
src0,
src1,
sat);
243 return esimd::asr<T0, T1, SZ, U, Sat>(
src0,
src1,
sat);
256 template <
typename T0,
typename T1,
typename T2,
257 class Sat = __ESIMD_NS::saturation_off_tag>
258 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
259 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
260 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
261 std::is_integral<T0>::value &&
262 std::is_integral<T1>::value &&
263 std::is_integral<T2>::value,
264 std::remove_const_t<T0>>
266 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
267 __ESIMD_NS::simd<T0, 1> Result =
268 esimd::shr<T0, T1, 1, T2, Sat>(Src0,
src1,
sat);
280 template <
typename T0,
typename T1,
int SZ>
281 __ESIMD_API std::enable_if_t<
282 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
284 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
286 __ESIMD_NS::simd<T0, SZ>>
287 rol(__ESIMD_NS::simd<T1, SZ>
src0, __ESIMD_NS::simd<T1, SZ>
src1) {
288 return __esimd_rol<T0, T1, SZ>(
src0.data(),
src1.data());
299 template <
typename T0,
typename T1,
int SZ,
typename U>
300 __ESIMD_API std::enable_if_t<
301 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
303 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
305 __ESIMD_NS::detail::is_type<U, int16_t, uint16_t, int32_t, uint32_t,
307 __ESIMD_NS::simd<T0, SZ>>
309 __ESIMD_NS::simd<T1, SZ> Src1 =
src1;
310 return esimd::rol<T0>(
src0, Src1);
320 template <
typename T0,
typename T1,
typename T2>
321 __ESIMD_API std::enable_if_t<
322 __ESIMD_DNS::is_esimd_scalar<T0>::value &&
323 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
324 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
325 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
327 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
329 __ESIMD_NS::detail::is_type<T2, int16_t, uint16_t, int32_t, uint32_t,
331 std::remove_const_t<T0>>
333 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
334 __ESIMD_NS::simd<T0, 1> Result = esimd::rol<T0, T1, 1, T2>(Src0,
src1);
346 template <
typename T0,
typename T1,
int SZ>
347 __ESIMD_API std::enable_if_t<
348 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
350 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
352 __ESIMD_NS::simd<T0, SZ>>
353 ror(__ESIMD_NS::simd<T1, SZ>
src0, __ESIMD_NS::simd<T1, SZ>
src1) {
354 return __esimd_ror<T0, T1, SZ>(
src0.data(),
src1.data());
365 template <
typename T0,
typename T1,
int SZ,
typename U>
366 __ESIMD_API std::enable_if_t<
367 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
369 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
371 __ESIMD_NS::detail::is_type<U, int16_t, uint16_t, int32_t, uint32_t,
373 __ESIMD_NS::simd<T0, SZ>>
375 __ESIMD_NS::simd<T1, SZ> Src1 =
src1;
376 return esimd::ror<T0>(
src0, Src1);
386 template <
typename T0,
typename T1,
typename T2>
387 __ESIMD_API std::enable_if_t<
388 __ESIMD_DNS::is_esimd_scalar<T0>::value &&
389 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
390 __ESIMD_DNS::is_esimd_scalar<T2>::value &&
391 __ESIMD_NS::detail::is_type<T0, int16_t, uint16_t, int32_t, uint32_t,
393 __ESIMD_NS::detail::is_type<T1, int16_t, uint16_t, int32_t, uint32_t,
395 __ESIMD_NS::detail::is_type<T2, int16_t, uint16_t, int32_t, uint32_t,
397 std::remove_const_t<T0>>
399 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
400 __ESIMD_NS::simd<T0, 1> Result = esimd::ror<T0, T1, 1, T2>(Src0,
src1);
414 template <
typename T,
typename T0,
typename T1,
int N>
415 __ESIMD_API __ESIMD_NS::simd<T, N>
imul_impl(__ESIMD_NS::simd<T, N> &rmd,
416 __ESIMD_NS::simd<T0, N>
src0,
417 __ESIMD_NS::simd<T1, N>
src1) {
418 static_assert(__ESIMD_DNS::is_dword_type<T>::value &&
419 __ESIMD_DNS::is_dword_type<T0>::value &&
420 __ESIMD_DNS::is_dword_type<T1>::value,
421 "expected 32-bit integer vector operands.");
422 using Comp32T = __ESIMD_DNS::computation_type_t<T0, T1>;
423 auto Src0 =
src0.template bit_cast_view<Comp32T>();
424 auto Src1 =
src1.template bit_cast_view<Comp32T>();
428 std::conditional_t<std::is_signed_v<Comp32T>,
int64_t, uint64_t>;
429 __ESIMD_NS::simd<Comp64T, N> Product64 = Src0;
433 auto Product32 = Product64.template bit_cast_view<T>();
434 if constexpr (N == 1) {
438 rmd = Product32.template select<N, 2>(0);
439 return Product32.template select<N, 2>(1);
447 template <
typename T,
typename T0,
typename T1,
int N>
448 __ESIMD_API __ESIMD_NS::simd<T, N>
imul(__ESIMD_NS::simd<T, N> &rmd,
449 __ESIMD_NS::simd<T0, N>
src0,
450 __ESIMD_NS::simd<T1, N>
src1) {
451 return imul_impl<T, T0, T1, N>(rmd,
src0,
src1);
459 template <
typename T,
typename T0,
typename T1,
int N>
460 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T1>::value,
461 __ESIMD_NS::simd<T, N>>
462 imul(__ESIMD_NS::simd<T, N> &rmd, __ESIMD_NS::simd<T0, N>
src0, T1
src1) {
463 __ESIMD_NS::simd<T1, N> Src1V =
src1;
464 return esimd::imul_impl<T, T0, T1, N>(rmd,
src0, Src1V);
472 template <
typename T,
typename T0,
typename T1,
int N>
473 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T0>::value,
474 __ESIMD_NS::simd<T, N>>
475 imul(__ESIMD_NS::simd<T, N> &rmd, T0
src0, __ESIMD_NS::simd<T1, N>
src1) {
476 __ESIMD_NS::simd<T0, N> Src0V =
src0;
477 return esimd::imul_impl<T, T0, T1, N>(rmd, Src0V,
src1);
484 template <
typename T,
typename T0,
typename T1>
485 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_dword_type<T>::value &&
486 __ESIMD_DNS::is_dword_type<T0>::value &&
487 __ESIMD_DNS::is_dword_type<T1>::value,
490 __ESIMD_NS::simd<T, 1> RmdV = rmd;
491 __ESIMD_NS::simd<T0, 1> Src0V =
src0;
492 __ESIMD_NS::simd<T1, 1> Src1V =
src1;
493 __ESIMD_NS::simd<T, 1> Res =
494 esimd::imul_impl<T, T0, T1, 1>(RmdV, Src0V, Src1V);
501 "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
502 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
addc(
503 __ESIMD_NS::
simd<uint32_t, N> &carry, __ESIMD_NS::
simd<uint32_t, N>
src0,
504 __ESIMD_NS::
simd<uint32_t, N>
src1) {
510 "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
511 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
addc(
512 __ESIMD_NS::
simd<uint32_t, N> &carry, __ESIMD_NS::
simd<uint32_t, N>
src0,
519 "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
520 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
addc(
521 __ESIMD_NS::
simd<uint32_t, N> &carry, uint32_t
src0,
522 __ESIMD_NS::
simd<uint32_t, N>
src1) {
527 "Please use sycl::ext::intel::esimd::addc(carry, src0, src1);")
528 __ESIMD_API uint32_t
addc(uint32_t &carry, uint32_t
src0, uint32_t
src1) {
534 "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
535 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
subb(
536 __ESIMD_NS::
simd<uint32_t, N> &borrow, __ESIMD_NS::
simd<uint32_t, N>
src0,
537 __ESIMD_NS::
simd<uint32_t, N>
src1) {
543 "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
544 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
subb(
545 __ESIMD_NS::
simd<uint32_t, N> &borrow, __ESIMD_NS::
simd<uint32_t, N>
src0,
552 "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
553 __ESIMD_API __ESIMD_NS::
simd<uint32_t, N>
subb(
554 __ESIMD_NS::
simd<uint32_t, N> &borrow, uint32_t
src0,
555 __ESIMD_NS::
simd<uint32_t, N>
src1) {
560 "Please use sycl::ext::intel::esimd::subb(borrow, src0, src1);")
561 __ESIMD_API uint32_t
subb(uint32_t &borrow, uint32_t
src0, uint32_t
src1) {
572 template <
typename T,
int SZ,
typename U>
575 std::enable_if_t<
std::is_integral<T>::value &&
std::is_integral<U>::value,
587 template <
typename T0,
typename T1>
589 __ESIMD_API
std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
590 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
591 std::is_integral<T0>::value &&
592 std::is_integral<T1>::value,
604 template <
typename T,
int SZ,
typename U>
607 std::enable_if_t<
std::is_integral<T>::value &&
std::is_integral<U>::value,
619 template <
typename T0,
typename T1>
621 __ESIMD_API
std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
622 __ESIMD_DNS::is_esimd_scalar<T1>::value &&
623 std::is_integral<T0>::value &&
624 std::is_integral<T1>::value,
638 template <
typename T,
int SZ,
typename U>
640 __ESIMD_API
std::enable_if_t<
641 std::is_integral<T>::value &&
std::is_integral<U>::value,
657 template <
typename T,
int SZ,
typename U>
659 __ESIMD_API
std::enable_if_t<
660 std::is_integral<T>::value &&
std::is_integral<U>::value &&
661 __ESIMD_DNS::is_esimd_scalar<U>::value,
677 template <
typename RT,
typename T0,
typename T1>
679 __ESIMD_API
std::enable_if_t<
680 __ESIMD_DNS::is_esimd_scalar<RT>::value &&
681 __ESIMD_DNS::is_esimd_scalar<T0>::value &&
682 __ESIMD_DNS::is_esimd_scalar<T1>::value,
691 #if defined(ESIMD_GEN7_5) || defined(ESIMD_GEN8) || defined(ESIMD_GEN8_5) || \
692 defined(ESIMD_GEN9) || defined(ESIMD_GEN9_5)
704 template <
typename T0,
typename T1,
int SZ,
typename U,
705 class Sat = __ESIMD_NS::saturation_off_tag>
709 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
710 __ESIMD_NS::simd<float, SZ> Src0 =
src0;
711 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
712 __ESIMD_NS::simd<float, SZ> Result = __esimd_dp2(Src0.data(), Src1.data());
713 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
716 return __ESIMD_NS::saturate<T0>(Result);
729 template <
typename T0,
typename T1,
int SZ,
typename U,
730 class Sat = __ESIMD_NS::saturation_off_tag>
734 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
735 __ESIMD_NS::simd<float, SZ> Src0 =
src0;
736 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
737 __ESIMD_NS::simd<float, SZ> Result = __esimd_dp3(Src0.data(), Src1.data());
738 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
741 return __ESIMD_NS::saturate<T0>(Result);
754 template <
typename T0,
typename T1,
int SZ,
typename U,
755 class Sat = __ESIMD_NS::saturation_off_tag>
759 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
760 __ESIMD_NS::simd<float, SZ> Src0 =
src0;
761 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
762 __ESIMD_NS::simd<float, SZ> Result = __esimd_dp4(Src0.data(), Src1.data());
763 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
766 return __ESIMD_NS::saturate<T0>(Result);
779 template <
typename T0,
typename T1,
typename U,
int SZ,
780 class Sat = __ESIMD_NS::saturation_off_tag>
784 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
785 __ESIMD_NS::simd<float, SZ> Src0 =
src0;
786 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
787 __ESIMD_NS::simd<float, SZ> Result = __esimd_dph(Src0.data(), Src1.data());
788 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
791 return __ESIMD_NS::saturate<T0>(Result);
805 template <
typename RT,
typename T1,
typename T2,
int SZ,
806 class Sat = __ESIMD_NS::saturation_off_tag>
811 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
813 __ESIMD_NS::simd<float, 4> Src0 =
src0;
814 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
815 __ESIMD_NS::simd<float, SZ> Result = __esimd_line(Src0.data(), Src1.data());
817 __ESIMD_NS::simd<RT, SZ> Result;
818 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
821 return __ESIMD_NS::saturate<RT>(Result);
835 template <
typename RT,
typename T,
int SZ,
836 class Sat = __ESIMD_NS::saturation_off_tag>
838 __ESIMD_API __ESIMD_NS::
simd<RT,
SZ>
line(
float P,
float Q,
841 __ESIMD_NS::simd<float, 4> Src0 = P;
843 return esimd::line<RT>(Src0,
src1,
sat);
868 template <
typename T0,
typename T1,
int SZ,
typename U,
869 class Sat = __ESIMD_NS::saturation_off_tag>
871 __ESIMD_API
std::enable_if_t<
872 __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
873 std::is_floating_point<T1>::value &&
874 __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
875 std::is_floating_point<U>::value,
878 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
880 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
881 __ESIMD_NS::simd<float, SZ> Result;
883 for (
int i = 0; i <
SZ; i += 4) {
884 Result.select<4, 1>(i) =
src0[i] * Src1[i] +
src0[i + 1] * Src1[i + 1];
886 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
889 return __ESIMD_NS::saturate<T1>(Result);
902 template <
typename T0,
typename T1,
int SZ,
typename U,
903 class Sat = __ESIMD_NS::saturation_off_tag>
905 __ESIMD_API
std::enable_if_t<
906 __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
907 std::is_floating_point<T1>::value &&
908 __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
909 std::is_floating_point<U>::value,
912 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
914 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
915 __ESIMD_NS::simd<float, SZ> Result;
917 for (
int i = 0; i <
SZ; i += 4) {
918 Result.select<4, 1>(i) =
src0[i] * Src1[i] +
src0[i + 1] * Src1[i + 1] +
919 src0[i + 2] * Src1[i + 2];
921 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
924 return __ESIMD_NS::saturate<T1>(Result);
937 template <
typename T0,
typename T1,
int SZ,
typename U,
938 class Sat = __ESIMD_NS::saturation_off_tag>
940 __ESIMD_API
std::enable_if_t<
941 __ESIMD_DNS::is_fp_or_dword_type<T1>::value &&
942 std::is_floating_point<T1>::value &&
943 __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
944 std::is_floating_point<U>::value,
947 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
949 __ESIMD_NS::simd<T1, SZ> Src1 =
src1;
950 __ESIMD_NS::simd<float, SZ> Result;
952 for (
int i = 0; i <
SZ; i += 4) {
953 Result.select<4, 1>(i) =
src0[i] * Src1[i] +
src0[i + 1] * Src1[i + 1] +
954 src0[i + 2] * Src1[i + 2] +
955 src0[i + 3] * Src1[i + 3];
957 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
960 return __ESIMD_NS::saturate<T1>(Result);
973 template <
typename T,
typename U,
int SZ,
974 class Sat = __ESIMD_NS::saturation_off_tag>
977 std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
978 std::is_floating_point<T>::value &&
979 __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
980 std::is_floating_point<U>::value,
983 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
985 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
986 __ESIMD_NS::simd<float, SZ> Result;
988 for (
int i = 0; i <
SZ; i += 4) {
989 Result.select<4, 1>(i) =
src0[i] * Src1[i] +
src0[i + 1] * Src1[i + 1] +
990 src0[i + 2] * Src1[i + 2] + 1.0 * Src1[i + 3];
992 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
995 return __ESIMD_NS::saturate<T>(Result);
1008 template <
typename T,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
1011 std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1012 std::is_floating_point<T>::value,
1016 static_assert(
SZ % 4 == 0,
"result size is not a multiple of 4");
1021 for (
int i = 0; i <
SZ; i += 4) {
1025 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1028 return __ESIMD_NS::saturate<T>(Result);
1042 template <
typename T,
int SZ,
class Sat = __ESIMD_NS::saturation_off_tag>
1045 std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1046 std::is_floating_point<T>::value,
1050 __ESIMD_NS::simd<T, 4> Src0 = P;
1052 return esimd::line<T>(Src0,
src1,
sat);
1063 template <
typename T,
int SZ>
1065 __ESIMD_NS::simd<float, SZ> Src0 =
src0;
1066 return __esimd_frc(Src0.data());
1075 __ESIMD_NS::simd<T, 1> Src0 =
src0;
1076 __ESIMD_NS::simd<T, 1> Result = esimd::frc<T>(Src0);
1081 template <
typename RT,
typename T0,
int SZ,
1082 class Sat = __ESIMD_NS::saturation_off_tag>
1083 __ESIMD_API __ESIMD_NS::simd<RT, SZ>
lzd(__ESIMD_NS::simd<T0, SZ>
src0,
1086 __ESIMD_NS::simd<__ESIMD_NS::uint, SZ> Src0 =
src0;
1087 return __esimd_lzd<__ESIMD_NS::uint, SZ>(Src0.data());
1090 template <
typename RT,
typename T0,
class Sat = __ESIMD_NS::saturation_off_tag>
1091 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<RT>::value &&
1092 __ESIMD_DNS::is_esimd_scalar<T0>::value,
1093 std::remove_const_t<RT>>
1095 __ESIMD_NS::simd<T0, 1> Src0 =
src0;
1096 __ESIMD_NS::simd<RT, 1> Result = esimd::lzd<RT>(Src0);
1101 #if defined(ESIMD_GEN7_5) || defined(ESIMD_GEN8) || defined(ESIMD_GEN8_5) || \
1102 defined(ESIMD_GEN9) || defined(ESIMD_GEN9_5)
1104 template <
int SZ,
typename U,
typename V,
1105 class Sat = __ESIMD_NS::saturation_off_tag>
1109 static_assert(
SZ >= 4 && (
SZ & 0x3) == 0,
1110 "vector size must be a multiple of 4");
1111 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
1112 __ESIMD_NS::simd<float, SZ> Src2 = src2;
1113 __ESIMD_NS::simd<float, SZ> Result =
1114 __esimd_lrp<SZ>(
src0.data(), Src1.data(), Src2.data());
1116 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1119 return __ESIMD_NS::saturate<float>(Result);
1133 template <
typename T,
int SZ,
typename U,
typename V,
1134 class Sat = __ESIMD_NS::saturation_off_tag>
1137 std::enable_if_t<__ESIMD_DNS::is_fp_or_dword_type<T>::value &&
1138 std::is_floating_point<T>::value &&
1139 __ESIMD_DNS::is_fp_or_dword_type<U>::value &&
1140 std::is_floating_point<U>::value,
1145 __ESIMD_NS::simd<float, SZ> Src1 =
src1;
1146 __ESIMD_NS::simd<float, SZ> Src2 = src2;
1147 __ESIMD_NS::simd<float, SZ> Result;
1148 Result = Src1 *
src0 + Src2 * (1.0f -
src0);
1149 if constexpr (std::is_same_v<Sat, __ESIMD_NS::saturation_off_tag>)
1152 return __ESIMD_NS::saturate<T>(Result);
1162 template <
typename T0,
typename T1,
int SZ>
1164 __ESIMD_NS::simd<unsigned, SZ> Src0 =
src0;
1165 return __esimd_bfrev<unsigned>(Src0.data());
1169 template <
typename T0,
typename T1>
1170 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1171 __ESIMD_DNS::is_esimd_scalar<T1>::value,
1172 std::remove_const_t<T0>>
1174 __ESIMD_NS::simd<T1, 1> Src0 =
src0;
1175 __ESIMD_NS::simd<T0, 1> Result = esimd::bf_reverse<T0>(Src0);
1180 template <
typename T0,
typename T1,
int SZ,
typename U,
typename V,
typename W>
1182 std::enable_if_t<std::is_integral<T1>::value, __ESIMD_NS::simd<T0, SZ>>
1184 typedef typename __ESIMD_DNS::dword_type<T1> DT1;
1185 static_assert(std::is_integral<DT1>::value &&
sizeof(DT1) ==
sizeof(
int),
1186 "operand conversion failed");
1187 __ESIMD_NS::simd<DT1, SZ> Src0 =
src0;
1188 __ESIMD_NS::simd<DT1, SZ> Src1 =
src1;
1189 __ESIMD_NS::simd<DT1, SZ> Src2 = src2;
1190 __ESIMD_NS::simd<DT1, SZ> Src3 = src3;
1192 return __esimd_bfi<DT1>(Src0.data(), Src1.data(), Src2.data(), Src3.data());
1196 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4>
1197 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1198 __ESIMD_DNS::is_esimd_scalar<T4>::value,
1199 std::remove_const_t<T0>>
1201 __ESIMD_NS::simd<T4, 1> Src3 = src3;
1202 __ESIMD_NS::simd<T0, 1> Result = esimd::bf_insert<T0>(
src0,
src1, src2, Src3);
1207 template <
typename T0,
typename T1,
int SZ,
typename U,
typename V>
1209 std::enable_if_t<std::is_integral<T1>::value, __ESIMD_NS::simd<T0, SZ>>
1211 typedef typename __ESIMD_DNS::dword_type<T1> DT1;
1212 static_assert(std::is_integral<DT1>::value &&
sizeof(DT1) ==
sizeof(
int),
1213 "operand conversion failed");
1214 __ESIMD_NS::simd<DT1, SZ> Src0 =
src0;
1215 __ESIMD_NS::simd<DT1, SZ> Src1 =
src1;
1216 __ESIMD_NS::simd<DT1, SZ> Src2 = src2;
1218 return __esimd_sbfe<DT1>(Src0.data(), Src1.data(), Src2.data());
1222 template <
typename T0,
typename T1,
typename T2,
typename T3>
1223 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T0>::value &&
1224 __ESIMD_DNS::is_esimd_scalar<T3>::value,
1225 std::remove_const_t<T0>>
1227 __ESIMD_NS::simd<T3, 1> Src2 = src2;
1228 __ESIMD_NS::simd<T0, 1> Result = esimd::bf_extract<T0>(
src0,
src1, Src2);
1238 template <
int SZ,
typename U,
class Sat = __ESIMD_NS::saturation_off_tag>
1239 __ESIMD_API __ESIMD_NS::simd<float, SZ>
1249 constexpr
double __ESIMD_CONST_PI = 3.1415926535897932384626433832795;
1253 template <
typename T,
int SZ>
1255 static_assert(std::is_floating_point<T>::value,
1256 "Floating point argument type is expected.");
1262 __ESIMD_NS::simd_mask<SZ> Gt1 = Src0 > T(1.0);
1264 sign.merge(OneN, OneP,
src0 < 0);
1272 (Src0P4 * T(0.185696) + ((Src0 * T(0.787997) + T(0.63693)) * Src0P2) +
1274 (((((Src0 * -T(0.000121387) + T(0.00202308)) * Src0P2) +
1275 (Src0 * -T(0.0149145)) + T(0.182569)) *
1277 ((Src0 * T(0.395889) + T(1.12158)) * Src0P2) + (Src0 * T(0.636918)) +
1280 Result.merge(Result - T(detail::__ESIMD_CONST_PI) / T(2.0), Gt1);
1286 static_assert(std::is_floating_point<T>::value,
1287 "Floating point argument type is expected.");
1288 __ESIMD_NS::simd<T, 1> Src0 =
src0;
1289 __ESIMD_NS::simd<T, 1> Result =
esimd::atan(Src0);
1295 template <
typename T,
int SZ>
1301 __ESIMD_NS::simd_mask<SZ> Neg =
src0 < T(0.0);
1302 __ESIMD_NS::simd_mask<SZ> TooBig = Src0 >= T(0.999998);
1306 Src0.merge(T(0.0), TooBig);
1314 (((Src01m * T(0.015098965761299077) - T(0.005516443930088506)) * Src0P4) +
1315 ((Src01m * T(0.047654245891495528) + T(0.163910606547823220)) * Src0P2) +
1316 Src01m * T(2.000291665285952400) - T(0.000007239283986332)) *
1319 Result.merge(T(0.0), TooBig);
1320 Result.merge(T(detail::__ESIMD_CONST_PI) - Result, Neg);
1324 template <
typename T>
1325 __ESIMD_API std::enable_if_t<std::is_floating_point<T>::value, T>
acos(T
src0) {
1326 __ESIMD_NS::simd<T, 1> Src0 =
src0;
1327 __ESIMD_NS::simd<T, 1> Result =
esimd::acos(Src0);
1333 template <
typename T,
int SZ>
1337 __ESIMD_NS::simd_mask<SZ> Neg =
src0 < T(0.0);
1342 Result.merge(-Result, Neg);
1346 template <
typename T>
1347 __ESIMD_API std::enable_if_t<std::is_floating_point<T>::value, T>
asin(T
src0) {
1348 __ESIMD_NS::simd<T, 1> Src0 =
src0;
1349 __ESIMD_NS::simd<T, 1> Result =
esimd::asin(Src0);
1360 __ESIMD_NS::simd<float, N>
atan2_fast(__ESIMD_NS::simd<float, N> y,
1361 __ESIMD_NS::simd<float, N> x);
1368 __ESIMD_NS::simd<float, N>
atan2(__ESIMD_NS::simd<float, N> y,
1369 __ESIMD_NS::simd<float, N> x);
1371 template <
typename T>
float atan2(T y, T x);
1376 __ESIMD_NS::simd<float, N>
fmod(__ESIMD_NS::simd<float, N> y,
1377 __ESIMD_NS::simd<float, N> x);
1379 template <
typename T>
float fmod(T y, T x);
1384 __ESIMD_NS::simd<float, N>
sin_emu(__ESIMD_NS::simd<float, N> x);
1391 __ESIMD_NS::simd<float, N>
cos_emu(__ESIMD_NS::simd<float, N> x);
1401 __ESIMD_NS::simd<float, N>
tanh_cody_waite(__ESIMD_NS::simd<float, N> x);
1404 float tanh(
float x);
1406 template <
int N> __ESIMD_NS::simd<float, N>
tanh(__ESIMD_NS::simd<float, N> x);
1413 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1414 atan2_fast(__ESIMD_NS::simd<float, N> y, __ESIMD_NS::simd<float, N> x) {
1416 constexpr
float CONST_DBL_EPSILON = 0.00001f;
1417 __ESIMD_NS::simd<float, N> OneP(1.0f);
1418 __ESIMD_NS::simd<float, N> OneN(-1.0f);
1419 __ESIMD_NS::simd<float, N> sign;
1420 __ESIMD_NS::simd<float, N>
atan2;
1421 __ESIMD_NS::simd<float, N>
r;
1422 __ESIMD_NS::simd_mask<N> mask =
x < 0;
1425 r.merge((
x + abs_y) / (abs_y -
x), (
x - abs_y) / (
x + abs_y), mask);
1426 atan2.merge(
float(detail::__ESIMD_CONST_PI) * 0.75f,
1427 float(detail::__ESIMD_CONST_PI) * 0.25f, mask);
1428 atan2 += (0.1963f *
r *
r - 0.9817f) *
r;
1430 sign.merge(OneN, OneP,
y < 0);
1432 return atan2 * sign;
1437 __ESIMD_NS::simd<float, 1> vy =
y;
1438 __ESIMD_NS::simd<float, 1> vx =
x;
1446 ESIMD_INLINE __ESIMD_NS::simd<float, N>
atan2(__ESIMD_NS::simd<float, N> y,
1447 __ESIMD_NS::simd<float, N> x) {
1448 __ESIMD_NS::simd<float, N>
atan2;
1449 __ESIMD_NS::simd_mask<N> mask;
1452 constexpr
float CONST_DBL_EPSILON = 0.00001f;
1455 atan2.merge(
float(-detail::__ESIMD_CONST_PI) / 2.f, 0.f, mask);
1457 atan2.merge(
float(detail::__ESIMD_CONST_PI) / 2.f, mask);
1458 mask = (
x < -CONST_DBL_EPSILON &&
y < -CONST_DBL_EPSILON);
1459 atan2.merge(
atan -
float(detail::__ESIMD_CONST_PI), mask);
1460 mask = (
x < -CONST_DBL_EPSILON &&
y >= -CONST_DBL_EPSILON);
1461 atan2.merge(
atan +
float(detail::__ESIMD_CONST_PI), mask);
1462 mask = (
x > CONST_DBL_EPSILON);
1469 template <> ESIMD_INLINE
float atan2(
float y,
float x) {
1470 __ESIMD_NS::simd<float, 1> vy =
y;
1471 __ESIMD_NS::simd<float, 1> vx =
x;
1479 ESIMD_INLINE __ESIMD_NS::simd<float, N>
fmod(__ESIMD_NS::simd<float, N> y,
1480 __ESIMD_NS::simd<float, N> x) {
1484 auto fmod_sign_mask = (
y.template bit_cast_view<int32_t>()) & 0x80000000;
1486 __ESIMD_NS::simd<float, N> reminder =
1487 abs_y - abs_x * __ESIMD_NS::trunc<float>(abs_y / abs_x);
1489 abs_x.merge(0.0f, reminder >= 0);
1490 __ESIMD_NS::simd<float, N>
fmod = reminder + abs_x;
1494 (fmod_abs.template bit_cast_view<int32_t>()) | fmod_sign_mask;
1495 return fmod_bits.template bit_cast_view<float>();
1499 template <> ESIMD_INLINE
float fmod(
float y,
float x) {
1500 return fmod(__ESIMD_NS::simd<float, 1>(
y), __ESIMD_NS::simd<float, 1>(
x))[0];
1506 ESIMD_INLINE __ESIMD_NS::simd<float, N>
sin_emu(__ESIMD_NS::simd<float, N> x) {
1507 __ESIMD_NS::simd<float, N> x1;
1508 __ESIMD_NS::simd<float, N> x2;
1509 __ESIMD_NS::simd<float, N> t3;
1511 __ESIMD_NS::simd<float, N> sign;
1512 __ESIMD_NS::simd<float, N> fTrig;
1513 __ESIMD_NS::simd<float, N> TwoPI(
float(detail::__ESIMD_CONST_PI) * 2.0f);
1514 __ESIMD_NS::simd<float, N> CmpI((
float)detail::__ESIMD_CONST_PI);
1515 __ESIMD_NS::simd<float, N> OneP(1.0f);
1516 __ESIMD_NS::simd<float, N> OneN(-1.0f);
1519 x.merge(TwoPI +
x,
x < 0);
1521 x1.merge(CmpI -
x,
x - CmpI, (
x <=
float(detail::__ESIMD_CONST_PI)));
1522 x1.merge(
x, (
x <=
float(detail::__ESIMD_CONST_PI) * 0.5f));
1523 x1.merge(TwoPI -
x, (
x >
float(detail::__ESIMD_CONST_PI) * 1.5f));
1525 sign.merge(OneN, OneP, (
x >
float(detail::__ESIMD_CONST_PI)));
1528 t3 = x2 * x1 * 0.1666667f;
1531 x1 + t3 * (OneN + x2 * 0.05f *
1532 (OneP + x2 * 0.0238095f *
1533 (OneN + x2 * 0.0138889f *
1534 (OneP - x2 * 0.0090909f))));
1540 template <> ESIMD_INLINE
float sin_emu(
float x0) {
1547 ESIMD_INLINE __ESIMD_NS::simd<float, N>
cos_emu(__ESIMD_NS::simd<float, N> x) {
1552 template <> ESIMD_INLINE
float cos_emu(
float x0) {
1560 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1561 tanh_cody_waite_impl(__ESIMD_NS::simd<float, N> x) {
1572 constexpr
float p0 = -0.8237728127E+00f;
1573 constexpr
float p1 = -0.3831010665E-02f;
1574 constexpr
float q0 = 0.2471319654E+01f;
1575 constexpr
float q1 = 1.0000000000E+00f;
1576 constexpr
float xsmall = 4.22863966691620432990E-04f;
1577 constexpr
float xmedium = 0.54930614433405484570f;
1578 constexpr
float xlarge = 8.66433975699931636772f;
1580 using RT = __ESIMD_NS::simd<float, N>;
1586 sign.merge(-1.f, 1.f, x < 0.f);
1588 auto isLarge = absX > xlarge;
1589 auto minor = absX <= xlarge;
1590 auto isGtMed = minor & (absX > xmedium);
1591 auto isGtSmall = (absX > xsmall) & (absX <= xmedium);
1594 res.merge(sign, x, isLarge);
1596 temp = ((temp - 2.f) / temp) * sign;
1597 res.merge(temp, isGtMed);
1598 res.merge((absX + absX * g * (g * p1 + p0) / (g + q0)) * sign, isGtSmall);
1604 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1605 tanh_impl(__ESIMD_NS::simd<float, N> x) {
1612 constexpr
float xsmall = 0.000045f;
1613 constexpr
float xlarge = 40.f;
1615 using RT = __ESIMD_NS::simd<float, N>;
1620 sign.merge(-1.f, 1.f, x < 0.f);
1622 auto isLarge = (absX > xlarge);
1623 auto isLessE = (absX <= xlarge);
1626 res.merge(sign, x, isLarge);
1631 res.merge(((
exp - 1.f) / (
exp + 1.f)) * sign, (absX > xsmall) & isLessE);
1641 return detail::tanh_cody_waite_impl(__ESIMD_NS::simd<float, 1>(
x))[0];
1645 ESIMD_INLINE __ESIMD_NS::simd<float, N>
1647 return detail::tanh_cody_waite_impl(
x);
1653 return esimd::detail::tanh_impl(__ESIMD_NS::simd<float, 1>(
x))[0];
1657 ESIMD_INLINE __ESIMD_NS::simd<float, N>
tanh(__ESIMD_NS::simd<float, N> x) {
1658 return esimd::detail::tanh_impl(
x);
1661 template <
typename T,
int N>
1662 __ESIMD_NS::simd<T, N>
dp4(__ESIMD_NS::simd<T, N> v1,
1663 __ESIMD_NS::simd<T, N> v2) {
1664 auto retv = __esimd_dp4<T, N>(v1.data(), v2.data());
1676 ESIMD_INLINE __ESIMD_NS::simd<sycl::half, N>
1677 srnd(__ESIMD_NS::simd<float, N>
src0, __ESIMD_NS::simd<uint16_t, N>
src1) {
1678 return __esimd_srnd<N>(
src0.data(),
src1.data());
1701 template <bfn_t FuncControl,
typename T,
int N>
1703 "Please use sycl::ext::intel::esimd::bfn<FuncControl>(src0, src1, src2);")
1704 __ESIMD_API
std::enable_if_t<
std::is_integral_v<T>, __ESIMD_NS::
simd<T, N>>
bfn(
1706 __ESIMD_NS::
simd<T, N> src2) {
1707 return __ESIMD_NS::bfn<FuncControl>(
src0,
src1, src2);
1717 template <bfn_t FuncControl,
typename T>
1719 "Please use sycl::ext::intel::esimd::bfn<FuncControl>(src0, src1, src2);")
1720 __ESIMD_API
std::enable_if_t<__ESIMD_DNS::is_esimd_scalar<T>::value &&
1721 std::is_integral_v<T>,
1723 return __ESIMD_NS::bfn<FuncControl>(
src0,
src1, src2);
1738 template <
typename T,
int N>
1739 ESIMD_INLINE __ESIMD_NS::simd<T, N>
fma(__ESIMD_NS::simd<T, N> a,
1740 __ESIMD_NS::simd<T, N> b,
1741 __ESIMD_NS::simd<T, N> c) {
1742 static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
1743 "fma only supports floating point types");
1744 using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
1745 auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
1746 __ESIMD_DNS::convert_vector<CppT, T, N>(
a.data()),
1747 __ESIMD_DNS::convert_vector<CppT, T, N>(
b.data()),
1748 __ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));
1749 return __ESIMD_DNS::convert_vector<T, CppT, N>(Ret);
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > lsr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Logical Shift Right (vector version)
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&sycl::ext::intel::esimd::detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), sycl::ext::intel::esimd::simd< T0, SZ > > ror(sycl::ext::intel::esimd::simd< T1, SZ > src0, sycl::ext::intel::esimd::simd< T1, SZ > src1)
Rotate right operation with two vector inputs.
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::detail::is_type< T0, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >) &&sycl::ext::intel::esimd::detail::is_type< T1, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t >), sycl::ext::intel::esimd::simd< T0, SZ > > rol(sycl::ext::intel::esimd::simd< T1, SZ > src0, sycl::ext::intel::esimd::simd< T1, SZ > src1)
Rotate left operation with two vector inputs.
__ESIMD_API std::enable_if_t< std::is_integral< T1 >::value, sycl::ext::intel::esimd::simd< T0, SZ > > bf_insert(U src0, V src1, W src2, sycl::ext::intel::esimd::simd< T1, SZ > src3)
bf_insert
__ESIMD_API std::enable_if_t< std::is_integral< T1 >::value, sycl::ext::intel::esimd::simd< T0, SZ > > bf_extract(U src0, V src1, sycl::ext::intel::esimd::simd< T1, SZ > src2)
bf_extract
__ESIMD_API sycl::ext::intel::esimd::simd< T0, SZ > bf_reverse(sycl::ext::intel::esimd::simd< T1, SZ > src0)
bf_reverse
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > asr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Arithmetical Shift Right (vector version)
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > shl(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Shift left operation (vector version)
__ESIMD_API std::enable_if_t< std::is_integral< T0 >::value &&std::is_integral< T1 >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > shr(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Shift right operation (vector version)
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > fma(sycl::ext::intel::esimd::simd< T, N > a, sycl::ext::intel::esimd::simd< T, N > b, sycl::ext::intel::esimd::simd< T, N > c)
Performs a fused multiply add computation with three vector operands.
ESIMD_INLINE uint64_t rdtsc()
rdtsc - get the value of timestamp counter.
bfn_t
This enum is used to encode all possible logical operations performed on the 3 input operands.
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t< __ESIMD_DNS::is_esimd_scalar< T >::value &&std::is_integral_v< T >, T > bfn(T src0, T src1, T src2)
Performs binary function computation with three scalar operands.
__ESIMD_API T cos(T src, Sat sat={})
Scalar version.
__ESIMD_API T sin(T src, Sat sat={})
Scalar version.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > imul_impl(sycl::ext::intel::esimd::simd< T, N > &rmd, sycl::ext::intel::esimd::simd< T0, N > src0, sycl::ext::intel::esimd::simd< T1, N > src1)
Computes the 64-bit result of two 32-bit element vectors src0 and src1 multiplication.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp4(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1)
Performs add with carry of a unsigned 32-bit scalars.
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > acos(sycl::ext::intel::esimd::simd< T, SZ > src0)
ESIMD_INLINE sycl::ext::intel::esimd::simd< sycl::half, N > srnd(sycl::ext::intel::esimd::simd< float, N > src0, sycl::ext::intel::esimd::simd< uint16_t, N > src1)
srnd - perform stochastic rounding.
__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1)
Performs substraction with borrow of 2 unsigned 32-bit scalars.
__ESIMD_API SZ simd< T, SZ > Sat sat
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp3(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
sycl::ext::intel::esimd::simd< float, N > cos_emu(sycl::ext::intel::esimd::simd< float, N > x)
__ESIMD_API sycl::ext::intel::esimd::simd< RT, SZ > lzd(sycl::ext::intel::esimd::simd< T0, SZ > src0, Sat sat={})
sycl::ext::intel::esimd::simd< float, N > sin_emu(sycl::ext::intel::esimd::simd< float, N > x)
__SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::addc(carry, src0, src1);") __ESIMD_API sycl
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > div(sycl::ext::intel::esimd::simd< T, SZ > &remainder, sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Integral division with a vector dividend and a scalar divisor.
__ESIMD_API sycl::ext::intel::esimd::simd< float, SZ > sincos(sycl::ext::intel::esimd::simd< float, SZ > &dstcos, U src0, Sat sat={})
__ESIMD_API SZ simd< T, SZ > Sat int SZ
ESIMD_DETAIL __ESIMD_API sycl::ext::intel::esimd::simd< T, SZ > atan(sycl::ext::intel::esimd::simd< T, SZ > src0)
__ESIMD_API simd< T, SZ >(min)(simd< T
Selects component-wise the minimum of the two vectors.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > quot(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Integral quotient (vector version)
sycl::ext::intel::esimd::simd< float, N > fmod(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
sycl::ext::intel::esimd::simd< float, N > atan2_fast(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
__ESIMD_API std::enable_if_t< std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > asin(sycl::ext::intel::esimd::simd< T, SZ > src0)
ESIMD_NODEBUG ESIMD_INLINE T exp(T src0, Sat sat={})
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > dph(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > lrp(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1, V src2, Sat sat={})
__ESIMD_API sycl::ext::intel::esimd::simd< T, SZ > frc(sycl::ext::intel::esimd::simd< T, SZ > src0)
Performs component-wise truncate-to-minus-infinity fraction operation of src0.
__ESIMD_API SZ simd< T, SZ > Sat int class Sat
float tanh_cody_waite(float x)
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T1 >::value &&std::is_floating_point< T1 >::value &&__ESIMD_DNS::is_fp_or_dword_type< U >::value &&std::is_floating_point< U >::value, sycl::ext::intel::esimd::simd< T0, SZ > > dp2(sycl::ext::intel::esimd::simd< T1, SZ > src0, U src1, Sat sat={})
Dot product on groups of 4 elements.
__ESIMD_API std::enable_if_t< detail::is_esimd_scalar< T1 >::value, std::remove_const_t< T1 > > abs(T1 src0)
Get absolute value (scalar version).
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > imul(sycl::ext::intel::esimd::simd< T, N > &rmd, sycl::ext::intel::esimd::simd< T0, N > src0, sycl::ext::intel::esimd::simd< T1, N > src1)
Computes the 64-bit multiply result of two 32-bit integer vectors src0 and src1.
sycl::ext::intel::esimd::simd< float, N > atan2(sycl::ext::intel::esimd::simd< float, N > y, sycl::ext::intel::esimd::simd< float, N > x)
__ESIMD_API std::enable_if_t< __ESIMD_DNS::is_fp_or_dword_type< T >::value &&std::is_floating_point< T >::value, sycl::ext::intel::esimd::simd< T, SZ > > line(sycl::ext::intel::esimd::simd< T, 4 > src0, sycl::ext::intel::esimd::simd< T, SZ > src1, Sat sat={})
Linear equation.
__ESIMD_API std::enable_if_t< std::is_integral< T >::value &&std::is_integral< U >::value, sycl::ext::intel::esimd::simd< T, SZ > > mod(sycl::ext::intel::esimd::simd< T, SZ > src0, U src1)
Modulo (vector version)
conditional< sizeof(long)==8, long, long long >::type int64_t
std::enable_if_t< std::is_same_v< Tp, sycl::half2 >, sycl::half2 > inv(Tp x)
std::enable_if_t< std::is_same_v< Tp, sycl::half2 >, sycl::half2 > rsqrt(Tp x)
ESIMD_NODEBUG ESIMD_INLINE sycl::ext::intel::esimd::simd< float, SZ > exp(sycl::ext::intel::esimd::simd< float, SZ > x) __NOEXC