20 inline namespace _V1 {
21 namespace ext::intel {
22 namespace experimental::esimd {
30 __esimd_sbarrier(flag);
62 template <
typename T1,
int n1,
typename T2,
int n2,
typename T3,
int n3,
64 __ESIMD_API __ESIMD_NS::simd<T1, n1>
65 raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
66 __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
67 uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
68 uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
69 __ESIMD_NS::simd_mask<N> mask = 1) {
70 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
71 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
72 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
73 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
74 constexpr
unsigned _Width3 = n3 *
sizeof(T3);
75 static_assert(_Width3 % 32 == 0,
"Invalid size for raw send msgSrc1");
77 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
78 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
79 using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
81 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
82 return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
83 modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
84 msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
108 template <
typename T1,
int n1,
typename T2,
int n2,
int N = 16>
109 __ESIMD_API __ESIMD_NS::simd<T1, n1>
110 raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
111 uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
112 uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0,
113 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
114 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
115 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
116 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
117 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
119 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
120 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
122 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
123 return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
124 modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
125 msgSrc0.data(), msgDst.data());
148 template <
typename T1,
int n1,
typename T2,
int n2,
int N = 16>
150 raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
151 uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
152 uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0,
153 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
154 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
155 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
156 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
157 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc1");
159 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
160 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
162 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
163 __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
164 modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
165 msgSrc0.data(), msgSrc1.data());
186 template <
typename T1,
int n1,
int N = 16>
188 raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
189 uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
190 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
191 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
192 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
193 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
194 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
195 __esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
196 numSrc0, sfid, exDesc, msgDesc,
211 template <
typename T,
int N>
212 __ESIMD_API std::enable_if_t<(
sizeof(T) * N >= 2)>
213 wait(__ESIMD_NS::simd<T, N> value) {
214 #ifdef __SYCL_DEVICE_ONLY__
215 uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
223 template <
typename T,
typename RegionT>
224 __ESIMD_API std::enable_if_t<
226 wait(__ESIMD_NS::simd_view<T, RegionT> value) {
227 #ifdef __SYCL_DEVICE_ONLY__
228 uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
243 template <
typename T,
int NBlocks,
int Height,
int Width,
bool Transposed,
247 Transposed, Transformed>();
251 template <
typename RT,
typename T,
int N>
252 ESIMD_INLINE __ESIMD_NS::simd<RT, N>
254 return __ESIMD_DNS::lsc_format_input<RT, T, N>(Vals);
258 template <
typename T,
typename T1,
int N>
259 ESIMD_INLINE __ESIMD_NS::simd<T, N>
261 return __ESIMD_DNS::lsc_format_ret<T, T1, N>(Vals);
275 static_assert(
true,
"Unsupported data type.");
279 template <cache_h
int L1H = cache_h
int::none, cache_h
int L2H = cache_h
int::none>
281 if constexpr (L1H == cache_hint::read_invalidate &&
282 L2H == cache_hint::cached) {
285 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
288 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
291 if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
294 if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
297 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
300 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
306 template <cache_h
int L1H = cache_h
int::none, cache_h
int L2H = cache_h
int::none>
308 if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
311 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
314 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
317 if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
320 if constexpr (L1H == cache_hint::write_through &&
321 L2H == cache_hint::uncached) {
324 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
327 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
350 template <
typename T,
int NElts = 1,
352 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
354 __ESIMD_NS::simd_mask<N> pred = 1) {
355 __ESIMD_NS::simd<T, N * NElts> pass_thru;
356 return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
376 template <
typename T,
int NElts = 1,
378 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
380 __ESIMD_NS::simd_mask<N> pred,
381 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
382 return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
400 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
401 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
402 __ESIMD_API __ESIMD_NS::simd<T, NElts>
404 FlagsT flags = FlagsT{}) {
406 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
407 return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, Props);
427 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
428 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
429 __ESIMD_API __ESIMD_NS::simd<T, NElts>
431 __ESIMD_NS::simd<T, NElts> pass_thru) {
433 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
434 return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, pass_thru, Props);
455 template <
typename T,
int NElts = 1,
458 int N,
typename Toffset>
459 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
461 __ESIMD_NS::simd_mask<N> pred = 1) {
462 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
463 __ESIMD_NS::simd<T, N * NElts> PassThru;
464 return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
488 template <
typename T,
int NElts = 1,
491 int N,
typename Toffset>
492 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
494 __ESIMD_NS::simd_mask<N> pred,
495 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
496 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
497 return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
501 template <
typename T,
int NElts = 1,
504 int N,
typename OffsetObjT,
typename RegionTy>
505 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
506 lsc_gather(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
507 __ESIMD_NS::simd_mask<N> pred = 1) {
508 return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
511 template <
typename T,
int NElts = 1,
514 int N,
typename OffsetObjT,
typename RegionTy>
515 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
516 lsc_gather(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
517 __ESIMD_NS::simd_mask<N> pred,
518 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
519 return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
523 template <
typename T,
int NElts = 1,
526 int N,
typename Toffset>
527 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
528 __ESIMD_NS::simd<T, N * NElts>>
529 lsc_gather(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
530 return lsc_gather<T, NElts, DS, L1H, L2H, N>(
531 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
534 template <
typename T,
int NElts = 1,
537 int N,
typename Toffset>
538 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
539 __ESIMD_NS::simd<T, N * NElts>>
540 lsc_gather(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
541 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
542 return lsc_gather<T, NElts, DS, L1H, L2H, N>(
543 p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
565 template <
typename T,
int NElts = 1,
568 int N,
typename AccessorTy>
570 std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
571 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
572 __ESIMD_NS::simd<T, N * NElts>>
574 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
575 __ESIMD_NS::simd_mask<N> pred = 1) {
576 #ifdef __ESIMD_FORCE_STATELESS_MEM
577 return lsc_gather<T, NElts, DS, L1H, L2H>(
578 __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
580 __ESIMD_NS::simd<T, N * NElts> PassThru;
581 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
582 return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
583 acc, offsets, pred, PassThru);
587 #ifdef __ESIMD_FORCE_STATELESS_MEM
588 template <
typename T,
int NElts = 1,
591 int N,
typename AccessorTy,
typename Toffset>
592 __ESIMD_API std::enable_if_t<
593 __ESIMD_DNS::is_device_accessor_with_v<
594 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
595 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
596 __ESIMD_NS::simd<T, N * NElts>>
597 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
598 __ESIMD_NS::simd_mask<N> pred = 1) {
599 return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
600 acc, convert<uint64_t>(offsets), pred);
604 template <
typename T,
int NElts = 1,
607 int N,
typename AccessorTy>
609 std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
610 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
611 __ESIMD_NS::simd<T, N * NElts>>
612 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
613 __ESIMD_NS::simd_mask<N> pred = 1) {
614 return lsc_slm_gather<T, NElts, DS>(
615 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
639 template <
typename T,
int NElts = 1,
642 int N,
typename AccessorTy>
644 std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
645 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
646 __ESIMD_NS::simd<T, N * NElts>>
648 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
649 __ESIMD_NS::simd_mask<N> pred,
650 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
651 #ifdef __ESIMD_FORCE_STATELESS_MEM
652 return lsc_gather<T, NElts, DS, L1H, L2H>(
653 __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred, pass_thru);
656 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
657 return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
658 acc, offsets, pred, pass_thru);
662 #ifdef __ESIMD_FORCE_STATELESS_MEM
663 template <
typename T,
int NElts = 1,
666 int N,
typename AccessorTy,
typename Toffset>
667 __ESIMD_API std::enable_if_t<
668 __ESIMD_DNS::is_device_accessor_with_v<
669 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
670 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
671 __ESIMD_NS::simd<T, N * NElts>>
672 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
673 __ESIMD_NS::simd_mask<N> pred,
674 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
675 return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
676 acc, convert<uint64_t>(offsets), pred, pass_thru);
680 template <
typename T,
int NElts = 1,
683 int N,
typename AccessorTy>
684 __ESIMD_API std::enable_if_t<
685 sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
686 __ESIMD_NS::simd<T, N * NElts>>
687 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
688 __ESIMD_NS::simd_mask<N> pred,
689 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
690 return lsc_slm_gather<T, NElts, DS>(
691 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru);
729 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
731 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
732 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
733 __ESIMD_NS::simd<T, NElts>>
736 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
737 __ESIMD_NS::simd<T, NElts> PassThru;
738 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
770 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
772 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
773 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
774 __ESIMD_NS::simd<T, NElts>>
777 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
778 __ESIMD_NS::simd<T, NElts> PassThru;
779 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(
780 p, __ESIMD_NS::simd_mask<1>(1), PassThru);
814 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
816 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
817 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
818 __ESIMD_NS::simd<T, NElts>>
820 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
822 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
823 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
858 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
861 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
862 __ESIMD_API std::enable_if_t<
863 __ESIMD_DNS::is_device_accessor_with_v<
864 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
865 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
866 __ESIMD_NS::simd<T, NElts>>
868 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
870 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
871 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
875 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
878 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
879 __ESIMD_API std::enable_if_t<
880 __ESIMD_DNS::is_local_accessor_with_v<
881 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
882 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
883 __ESIMD_NS::simd<T, NElts>>
885 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
886 return lsc_slm_block_load<T, NElts, DS>(
887 offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, flags);
917 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
920 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
921 __ESIMD_API std::enable_if_t<
922 __ESIMD_DNS::is_device_accessor_with_v<
923 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
924 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
925 __ESIMD_NS::simd<T, NElts>>
928 return lsc_block_load<T, NElts, DS, L1H, L2H>(
929 acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
932 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
935 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
936 __ESIMD_API std::enable_if_t<
937 __ESIMD_DNS::is_local_accessor_with_v<
938 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
939 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
940 __ESIMD_NS::simd<T, NElts>>
942 return lsc_block_load<T, NElts, DS, L1H, L2H>(
943 acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
978 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
981 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
982 __ESIMD_API std::enable_if_t<
983 __ESIMD_DNS::is_device_accessor_with_v<
984 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
985 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
986 __ESIMD_NS::simd<T, NElts>>
988 __ESIMD_NS::simd_mask<1> pred,
989 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
991 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
992 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
996 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
999 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1000 __ESIMD_API std::enable_if_t<
1001 __ESIMD_DNS::is_local_accessor_with_v<
1002 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1003 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1004 __ESIMD_NS::simd<T, NElts>>
1006 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
1007 return lsc_slm_block_load<T, NElts, DS>(
1008 offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru, flags);
1027 template <
typename T,
int NElts = 1,
1030 int N,
typename Toffset>
1031 __ESIMD_API
void lsc_prefetch(
const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1032 __ESIMD_NS::simd_mask<N> pred = 1) {
1033 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1034 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, offsets, pred);
1037 template <
typename T,
int NElts = 1,
1040 int N,
typename OffsetObjT,
typename RegionTy>
1042 lsc_prefetch(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1043 __ESIMD_NS::simd_mask<N> pred = 1) {
1044 lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
1047 template <
typename T,
int NElts = 1,
1050 int N,
typename Toffset>
1051 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
1052 lsc_prefetch(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
1053 lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
1054 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
1082 template <
typename T,
int NElts = 1,
1085 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1086 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1088 __ESIMD_NS::simd_mask<1> Mask = 1;
1090 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1091 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, 0, Mask);
1111 template <
typename T,
int NElts = 1,
1114 int N,
typename AccessorTy>
1115 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1116 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1118 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1119 __ESIMD_NS::simd_mask<N> pred = 1) {
1120 #ifdef __ESIMD_FORCE_STATELESS_MEM
1121 lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1124 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1125 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offsets, pred);
1129 #ifdef __ESIMD_FORCE_STATELESS_MEM
1130 template <
typename T,
int NElts = 1,
1133 int N,
typename AccessorTy,
typename Toffset>
1134 __ESIMD_API std::enable_if_t<
1135 __ESIMD_DNS::is_device_accessor_with_v<
1136 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1137 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1138 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1139 __ESIMD_NS::simd_mask<N> pred = 1) {
1140 lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1141 acc, convert<uint64_t>(offsets), pred);
1172 template <
typename T,
int NElts = 1,
1175 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag,
1176 typename AccessorTy>
1177 __ESIMD_API std::enable_if_t<
1178 __ESIMD_DNS::is_device_accessor_with_v<
1179 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1180 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1182 FlagsT flags = FlagsT{}) {
1183 #ifdef __ESIMD_FORCE_STATELESS_MEM
1184 lsc_prefetch<T, NElts, DS, L1H, L2H>(
1185 __ESIMD_DNS::accessorToPointer<T>(acc, offset), flags);
1187 __ESIMD_NS::simd_mask<1> Mask = 1;
1189 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1190 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offset, Mask);
1208 template <
typename T,
int NElts = 1,
1211 __ESIMD_NS::simd<T, N * NElts> vals,
1212 __ESIMD_NS::simd_mask<N> pred = 1) {
1213 __ESIMD_DNS::slm_scatter_impl<T, NElts, DS>(offsets, vals, pred);
1228 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1229 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1231 __ESIMD_NS::simd<T, NElts> vals,
1232 FlagsT flags = FlagsT{}) {
1235 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
1236 __ESIMD_NS::simd_mask<1> pred = 1;
1237 __ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
1257 template <
typename T,
int NElts = 1,
1260 int N,
typename Toffset>
1261 __ESIMD_API
void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1262 __ESIMD_NS::simd<T, N * NElts> vals,
1263 __ESIMD_NS::simd_mask<N> pred = 1) {
1264 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1265 __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT, N, Toffset>(
1266 p, offsets, vals, pred);
1269 template <
typename T,
int NElts = 1,
1272 int N,
typename OffsetObjT,
typename RegionTy>
1274 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1275 __ESIMD_NS::simd<T, N * NElts> vals,
1276 __ESIMD_NS::simd_mask<N> pred = 1) {
1277 lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
1280 template <
typename T,
int NElts = 1,
1283 int N,
typename Toffset>
1284 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1285 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
1286 __ESIMD_NS::simd_mask<N> pred = 1) {
1287 lsc_scatter<T, NElts, DS, L1H, L2H, N>(
1288 p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
1309 template <
typename T,
int NElts = 1,
1312 int N,
typename AccessorTy>
1313 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1314 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1316 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1317 __ESIMD_NS::simd<T, N * NElts> vals,
1318 __ESIMD_NS::simd_mask<N> pred = 1) {
1319 #ifdef __ESIMD_FORCE_STATELESS_MEM
1320 lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1321 offsets, vals, pred);
1323 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1324 __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT>(acc, offsets, vals,
1329 #ifdef __ESIMD_FORCE_STATELESS_MEM
1330 template <
typename T,
int NElts = 1,
1333 int N,
typename AccessorTy,
typename Toffset>
1334 __ESIMD_API std::enable_if_t<
1335 __ESIMD_DNS::is_device_accessor_with_v<
1336 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1337 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1338 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1339 __ESIMD_NS::simd<T, N * NElts> vals,
1340 __ESIMD_NS::simd_mask<N> pred = 1) {
1341 lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1342 acc, convert<uint64_t>(offsets), vals, pred);
1346 template <
typename T,
int NElts = 1,
1349 int N,
typename AccessorTy>
1350 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
1351 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1353 __ESIMD_NS::simd<T, N * NElts> vals,
1354 __ESIMD_NS::simd_mask<N> pred = 1) {
1355 lsc_slm_scatter<T, NElts, DS>(
1356 offsets + __ESIMD_DNS::localAccessorToOffset(acc), vals, pred);
1391 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1393 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1394 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1396 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1398 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1399 return __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(p, vals, pred);
1430 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1432 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1433 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1435 lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
1473 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1475 typename AccessorTy,
1476 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1477 __ESIMD_API std::enable_if_t<
1478 __ESIMD_DNS::is_device_accessor_with_v<
1479 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1480 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1482 __ESIMD_NS::simd<T, NElts> vals,
1483 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1485 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1486 __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(acc, offset, vals,
1490 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1492 typename AccessorTy,
1493 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1494 __ESIMD_API std::enable_if_t<
1495 __ESIMD_DNS::is_local_accessor_with_v<
1496 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1497 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1499 __ESIMD_NS::simd<T, NElts> vals, FlagsT flags = FlagsT{}) {
1500 lsc_slm_block_store<T, NElts, DS>(
1501 offset + __ESIMD_DNS::localAccessorToOffset(acc), vals, flags);
1534 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1536 typename AccessorTy,
1537 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1538 __ESIMD_API std::enable_if_t<
1539 __ESIMD_DNS::is_accessor_with_v<
1540 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1541 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1543 __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1544 lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
1545 __ESIMD_NS::simd_mask<1>(1), flags);
1578 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1579 bool Transposed =
false,
bool Transformed =
false,
1582 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1583 __ESIMD_API __ESIMD_NS::simd<T, N>
1584 lsc_load_2d(
const T *Ptr,
unsigned SurfaceWidth,
unsigned SurfaceHeight,
1585 unsigned SurfacePitch,
int X,
int Y) {
1586 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1587 return __ESIMD_DNS::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1588 Transposed, Transformed, PropertyListT>(
1589 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
1614 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1617 T, NBlocks, BlockHeight, BlockWidth,
false,
false>()>
1619 unsigned SurfaceHeight,
unsigned SurfacePitch,
1621 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1622 __ESIMD_DNS::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1623 PropertyListT>(Ptr, SurfaceWidth, SurfaceHeight,
1624 SurfacePitch, X, Y);
1651 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
1654 T, 1u, BlockHeight, BlockWidth,
false,
false>()>
1656 unsigned SurfaceHeight,
unsigned SurfacePitch,
1657 int X,
int Y, __ESIMD_NS::simd<T, N> Vals) {
1658 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1659 __ESIMD_DNS::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
1660 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
1670 template <
typename T,
int BlockW
idth,
int BlockHeight,
int NBlocks>
1677 payload_data.template select<1, 1>(7) =
1678 ((NBlocks - 1) << 16) | ((BlockHeight - 1) << 8) | (BlockWidth - 1);
1685 : payload_data(other.payload_data) {}
1699 uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X,
1702 payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1704 payload_data.template select<1, 1>(2) = SurfaceWidth;
1705 payload_data.template select<1, 1>(3) = SurfaceHeight;
1706 payload_data.template select<1, 1>(4) = SurfacePitch;
1707 payload_data.template select<1, 1>(5) = X;
1708 payload_data.template select<1, 1>(6) = Y;
1718 ->payload_data.template bit_cast_view<uint64_t>()[0]));
1727 ->payload_data.template select<1, 1>(2);
1736 ->payload_data.template select<1, 1>(3);
1745 ->payload_data.template select<1, 1>(4);
1754 ->payload_data.template select<1, 1>(5);
1763 ->payload_data.template select<1, 1>(6);
1790 payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1801 payload_data.template select<1, 1>(2) = SurfaceWidth;
1811 payload_data.template select<1, 1>(3) = SurfaceHeight;
1821 payload_data.template select<1, 1>(4) = SurfacePitch;
1831 payload_data.template select<1, 1>(5) = X;
1841 payload_data.template select<1, 1>(6) = Y;
1846 __ESIMD_NS::simd<uint32_t, 16> get_raw_data() {
return payload_data; }
1847 __ESIMD_NS::simd<uint32_t, 16> payload_data;
1849 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
1852 friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N>
lsc_load_2d(
1855 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
1859 __ESIMD_NS::simd<T1, N> Data);
1861 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
1887 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1888 bool Transposed =
false,
bool Transformed =
false,
1891 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1894 __ESIMD_DNS::check_lsc_block_2d_restrictions<
1895 T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1896 __ESIMD_DNS::block_2d_op::load>();
1897 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1900 constexpr
int ElemsPerDword = 4 /
sizeof(T);
1901 constexpr
int GRFRowSize = Transposed ? BlockHeight
1902 : Transformed ? BlockWidth * ElemsPerDword
1904 constexpr
int GRFRowPitch = __ESIMD_DNS::getNextPowerOf2<GRFRowSize>();
1905 constexpr
int GRFColSize =
1908 : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
1910 constexpr
int GRFBlockSize = GRFRowPitch * GRFColSize;
1911 constexpr
int GRFBlockPitch =
1912 __ESIMD_DNS::roundUpNextMultiple<64 /
sizeof(T), GRFBlockSize>();
1913 constexpr
int ActualN = NBlocks * GRFBlockPitch;
1915 constexpr
int DstBlockElements = GRFColSize * GRFRowSize;
1916 constexpr
int DstElements = DstBlockElements * NBlocks;
1918 constexpr uint32_t GrfBytes = 64;
1919 constexpr uint32_t DstBlockSize =
1920 __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof(T), GrfBytes>();
1921 constexpr uint32_t DstLength =
1922 (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
1923 constexpr uint32_t DstLengthMask = DstLength << 20;
1925 static_assert(N == ActualN || N == DstElements,
"Incorrect element count");
1927 constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1929 constexpr uint32_t base_desc = 0x2000003;
1930 constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
1931 constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
1932 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
1933 __ESIMD_NS::simd<T, N> oldDst;
1934 constexpr uint32_t exDesc = 0x0;
1935 constexpr uint32_t desc = base_desc | cache_mask | transformMask |
1936 transposeMask | dataSizeMask | DstLengthMask;
1937 constexpr uint8_t execSize = 1;
1938 constexpr uint8_t sfid = 0xF;
1939 constexpr uint8_t numSrc0 = 0x1;
1940 constexpr uint8_t numDst = (N *
sizeof(T)) / 64;
1941 __ESIMD_NS::simd<T, ActualN> Raw =
1942 __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
1943 oldDst, payload.get_raw_data(), exDesc, desc);
1945 if constexpr (ActualN == N) {
1951 __ESIMD_NS::simd<T, DstElements> Dst;
1953 for (
auto i = 0; i < NBlocks; i++) {
1955 Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
1957 auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
1958 DstBlock = RawBlock.template bit_cast_view<T, GRFColSize, GRFRowPitch>()
1959 .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
1960 .template bit_cast_view<T>();
1983 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1984 bool Transposed =
false,
bool Transformed =
false,
1987 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1990 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1993 __ESIMD_DNS::check_lsc_block_2d_restrictions<
1994 T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1996 static_assert(!Transposed || !Transformed,
1997 "Transposed and transformed is not supported");
1998 constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2000 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2001 constexpr uint32_t base_desc = 0x2000003;
2002 constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2003 constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2004 constexpr uint32_t exDesc = 0x0;
2005 constexpr uint32_t desc =
2006 base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2007 constexpr uint8_t execSize = 1;
2008 constexpr uint8_t sfid = 0xF;
2009 constexpr uint8_t numDst = (N *
sizeof(T)) / 64;
2010 __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data(), exDesc,
2029 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
2032 T, NBlocks, BlockHeight, BlockWidth,
false,
false>()>
2033 ESIMD_INLINE SYCL_ESIMD_FUNCTION
void
2035 __ESIMD_NS::simd<T, N> Data) {
2036 __ESIMD_DNS::check_lsc_block_2d_restrictions<
2037 T, BlockWidth, BlockHeight, NBlocks,
false,
false,
2038 __ESIMD_DNS::block_2d_op::store>();
2039 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2043 constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2045 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2046 constexpr uint32_t base_desc = 0x2000007;
2048 constexpr uint32_t exDesc = 0x0;
2049 constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2050 constexpr uint8_t execSize = 1;
2051 constexpr uint8_t sfid = 0xF;
2052 constexpr uint8_t numSrc0 = 0x1;
2053 constexpr uint8_t numSrc1 = (N *
sizeof(T)) / 64;
2055 __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1>(
2056 payload.get_raw_data(), Data, exDesc, desc);
2066 template <
typename T, __ESIMD_NS::atomic_op Op>
2069 __ESIMD_DNS::to_lsc_atomic_op<Op>();
2070 return static_cast<int>(LSCOp);
2089 __ESIMD_API __ESIMD_NS::simd<T, N>
2091 __ESIMD_NS::simd_mask<N> pred) {
2092 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, pred);
2111 __ESIMD_API __ESIMD_NS::simd<T, N>
2113 __ESIMD_NS::simd<T, N>
src0,
2114 __ESIMD_NS::simd_mask<N> pred) {
2115 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets,
src0, pred);
2135 __ESIMD_API __ESIMD_NS::simd<T, N>
2137 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2138 __ESIMD_NS::simd_mask<N> pred) {
2139 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets,
src0,
src1,
2161 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
2162 __ESIMD_NS::simd<T, N>>
2164 __ESIMD_NS::simd_mask<N> pred) {
2165 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2166 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2174 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2175 __ESIMD_DNS::get_num_args<Op>() == 0,
2176 __ESIMD_NS::simd<T, N>>
2178 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2179 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
2201 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2202 __ESIMD_NS::simd<T, N>>
2204 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2205 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2206 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2207 p, offsets,
src0, pred);
2213 typename OffsetObjT,
typename RegionTy>
2214 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2215 __ESIMD_NS::simd<T, N>>
2217 __ESIMD_NS::simd<T, N>
src0,
2218 __ESIMD_NS::simd_mask<N> pred = 1) {
2219 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(),
src0,
2227 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2228 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2229 ((Op != __ESIMD_NS::atomic_op::store &&
2230 Op != __ESIMD_NS::atomic_op::xchg) ||
2232 __ESIMD_NS::simd<T, N>>
2234 __ESIMD_NS::simd_mask<N> pred = 1) {
2235 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2236 p, __ESIMD_NS::simd<Toffset, N>(offset),
src0, pred);
2259 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2260 __ESIMD_NS::simd<T, N>>
2262 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2263 __ESIMD_NS::simd_mask<N> pred) {
2264 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2265 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2272 typename OffsetObjT,
typename RegionTy>
2273 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2274 __ESIMD_NS::simd<T, N>>
2276 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2277 __ESIMD_NS::simd_mask<N> pred = 1) {
2278 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(),
src0,
2286 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2287 __ESIMD_DNS::get_num_args<Op>() == 2,
2288 __ESIMD_NS::simd<T, N>>
2290 __ESIMD_NS::simd<T, N>
src1,
2291 __ESIMD_NS::simd_mask<N> pred = 1) {
2292 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2293 p, __ESIMD_NS::simd<Toffset, N>(offset),
src0,
src1, pred);
2316 typename AccessorTy,
typename Toffset>
2317 __ESIMD_API std::enable_if_t<
2318 __ESIMD_DNS::is_device_accessor_with_v<
2319 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
2320 (Op == __ESIMD_NS::atomic_op::load ||
2321 __ESIMD_DNS::is_device_accessor_with_v<
2322 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>),
2323 __ESIMD_NS::simd<T, N>>
2325 __ESIMD_NS::simd_mask<N> pred) {
2326 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2327 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2328 acc, offsets, pred);
2349 typename AccessorTy>
2350 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2351 __ESIMD_NS::simd<T, N>>
2353 __ESIMD_NS::simd_mask<N> pred) {
2354 return lsc_slm_atomic_update<Op, T, N, DS>(
2355 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
2379 typename AccessorTy,
typename Toffset>
2380 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2381 __ESIMD_NS::simd<T, N>>
2383 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2384 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2385 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2386 acc, offsets,
src0, pred);
2408 typename AccessorTy>
2409 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2410 __ESIMD_NS::simd<T, N>>
2412 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2413 return lsc_slm_atomic_update<Op, T, N, DS>(
2414 offsets + __ESIMD_DNS::localAccessorToOffset(acc),
src0, pred);
2439 typename AccessorTy,
typename Toffset>
2440 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2441 __ESIMD_NS::simd<T, N>>
2443 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2444 __ESIMD_NS::simd_mask<N> pred) {
2445 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2446 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2470 typename AccessorTy>
2471 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2472 __ESIMD_NS::simd<T, N>>
2474 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2475 __ESIMD_NS::simd_mask<N> pred) {
2476 return lsc_slm_atomic_update<Op, T, N, DS>(
2477 offsets + __ESIMD_DNS::localAccessorToOffset(acc),
src0,
src1, pred);
2490 #ifdef __SYCL_DEVICE_ONLY__
2491 return __spirv_BuiltInGlobalHWThreadIDINTEL();
2498 #ifdef __SYCL_DEVICE_ONLY__
2499 return __spirv_BuiltInSubDeviceIDINTEL();
2512 return __esimd_named_barrier_allocate(NbarCount);
2522 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2523 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2524 __ESIMD_DNS::get_num_args<Op>() == 0,
2527 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2533 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
simd<T, N>>
2536 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2540 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2541 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2542 __ESIMD_DNS::get_num_args<Op>() == 0,
2545 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2550 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2551 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2552 __ESIMD_DNS::get_num_args<Op>() == 1,
2556 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2557 p, offset,
src0, mask);
2562 __ESIMD_API __ESIMD_API
2563 std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
simd<T, N>>
2566 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2567 p, offsets,
src0, mask);
2570 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2571 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2572 __ESIMD_DNS::get_num_args<Op>() == 1,
2575 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2576 p, offset,
src0, mask);
2580 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2581 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2582 __ESIMD_DNS::get_num_args<Op>() == 2,
2589 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2595 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
simd<T, N>>
2598 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2602 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2603 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2604 __ESIMD_DNS::get_num_args<Op>() == 2,
2605 __ESIMD_NS::simd<T, N>>
2608 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2613 typename AccessorTy>
2614 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2615 __ESIMD_DNS::get_num_args<Op>() == 0 &&
2616 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2619 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2624 typename RegionTy,
typename AccessorTy>
2625 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
2626 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2630 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2631 acc, offsets, mask);
2635 typename AccessorTy>
2636 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2637 __ESIMD_DNS::get_num_args<Op>() == 0 &&
2638 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2641 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2647 typename AccessorTy>
2648 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2649 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2650 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2654 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2655 acc, offset,
src0, mask);
2659 typename RegionTy,
typename AccessorTy>
2660 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
2661 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2665 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2666 acc, offsets,
src0, mask);
2670 typename AccessorTy>
2671 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2672 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2673 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2677 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2678 acc, offset,
src0, mask);
2683 typename AccessorTy>
2684 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2685 __ESIMD_DNS::get_num_args<Op>() == 2 &&
2686 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2693 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2698 typename RegionTy,
typename AccessorTy>
2699 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
2700 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2704 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2709 typename AccessorTy>
2710 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2711 __ESIMD_DNS::get_num_args<Op>() == 2 &&
2712 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2713 __ESIMD_NS::simd<T, N>>
2716 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
This class represents a reference to a sub-region of a base simd object.
The main simd vector class.
Container class to hold parameters for load2d/store2d functions
T * get_data_pointer() const
Get a surface base address
uint32_t get_surface_pitch() const
Get surface pitch
config_2d_mem_access & set_x(int32_t X)
Sets top left corner X coordinate of the block
constexpr int32_t get_number_of_blocks() const
Get number of blocks
constexpr int32_t get_width() const
Get width of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload, sycl::ext::intel::esimd::simd< T1, N > Data)
config_2d_mem_access & set_surface_width(uint32_t SurfaceWidth)
Sets surface width
uint32_t get_surface_height() const
Get surface height
config_2d_mem_access()
Default constructor
int32_t get_x() const
Get top left corner X coordinate of the block
config_2d_mem_access & set_data_pointer(T *Ptr)
Sets surface base address
config_2d_mem_access & set_surface_height(uint32_t SurfaceHeight)
Sets surface height
config_2d_mem_access(const config_2d_mem_access &other)
Copy constructor
config_2d_mem_access & set_y(int32_t Y)
Sets top left corner Y coordinate of the block
config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth, uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X, int32_t Y)
Constructor
config_2d_mem_access & set_surface_pitch(uint32_t SurfacePitch)
Sets surface pitch
constexpr int32_t get_height() const
Get height of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T1, N > lsc_load_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
uint32_t get_surface_width() const
Get surface width
int32_t get_y() const
Get top left corner Y coordinate of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
__ESIMD_DNS::lsc_data_size lsc_data_size
sycl::ext::intel::esimd::cache_hint cache_hint
L1 or L2 cache hint kinds.
atomic_op
Represents an atomic operation.
__ESIMD_API int32_t get_subdevice_id()
Get subdevice ID.
__ESIMD_API int32_t get_hw_thread_id()
Get HW Thread ID.
__ESIMD_API SZ simd< T, SZ > src1
__ESIMD_API void lsc_slm_block_store(uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
Transposed SLM scatter with 1 channel.
__ESIMD_API void lsc_prefetch(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer prefetch gather.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > lsc_atomic_update(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
USM pointer atomic.
__ESIMD_API void lsc_slm_scatter(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM scatter.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block load.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_slm_atomic_update(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
SLM atomic.
__ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
2D USM pointer block store.
atomic_op
LSC atomic operation codes.
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > lsc_slm_block_load(uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
Transposed SLM gather with 1 channel.
__ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block prefetch.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_slm_gather(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM gather.
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > lsc_block_load(const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed gather with 1 channel.
__ESIMD_API void lsc_scatter(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer scatter.
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > lsc_block_store(T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed scatter with 1 channel.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_gather(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer gather.
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
__ESIMD_API void split_barrier()
Generic work-group split barrier.
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw send.
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, sycl::ext::intel::esimd::simd< T3, n3 > msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw sends.
typename make_L1_L2_alignment_properties< L1H, L2H, Alignment >::type make_L1_L2_alignment_properties_t
uint32_t DeviceAccessorOffsetT
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
constexpr uint32_t get_lsc_store_cache_mask()
constexpr int get_lsc_block_2d_data_size()
constexpr int lsc_to_internal_atomic_op()
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, N > lsc_format_input(sycl::ext::intel::esimd::simd< T, N > Vals)
constexpr uint32_t get_lsc_load_cache_mask()
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > lsc_format_ret(sycl::ext::intel::esimd::simd< T1, N > Vals)
constexpr uint32_t get_lsc_data_size()
__ESIMD_API uint8_t named_barrier_allocate()
Allocate additional named barriers for a kernel Available only on PVC.
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.