20 inline namespace _V1 {
21 namespace ext::intel {
22 namespace experimental::esimd {
30 __esimd_sbarrier(flag);
35 __esimd_sbarrier(flag);
67 template <
typename T1,
int n1,
typename T2,
int n2,
typename T3,
int n3,
69 __ESIMD_API __ESIMD_NS::simd<T1, n1>
70 raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
71 __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
72 uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
73 uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
74 __ESIMD_NS::simd_mask<N> mask = 1) {
75 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
76 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
77 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
78 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
79 constexpr
unsigned _Width3 = n3 *
sizeof(T3);
80 static_assert(_Width3 % 32 == 0,
"Invalid size for raw send msgSrc1");
82 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
83 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
84 using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
86 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
87 return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
88 modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
89 msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
111 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
112 uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
typename T1,
113 int n1,
typename T2,
int n2,
typename T3,
int n3>
116 __ESIMD_NS::
simd<T1, n1> msgDst, __ESIMD_NS::
simd<T2, n2> msgSrc0,
117 __ESIMD_NS::
simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
118 __ESIMD_NS::
simd_mask<execSize> mask = 1) {
119 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
120 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
121 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
122 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
123 constexpr
unsigned _Width3 = n3 *
sizeof(T3);
124 static_assert(_Width3 % 32 == 0,
"Invalid size for raw send msgSrc1");
126 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
127 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
128 using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
130 constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
132 return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, execSize>(
133 modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
134 msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
158 template <
typename T1,
int n1,
typename T2,
int n2,
int N = 16>
159 __ESIMD_API __ESIMD_NS::simd<T1, n1>
160 raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
161 uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
162 uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0,
163 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
164 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
165 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
166 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
167 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
169 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
170 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
172 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
173 return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
174 modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
175 msgSrc0.data(), msgDst.data());
195 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst,
196 uint8_t isEOT = 0, uint8_t isSendc = 0,
typename T1,
int n1,
200 __ESIMD_NS::
simd<T1, n1> msgDst, __ESIMD_NS::
simd<T2, n2> msgSrc0,
201 uint32_t exDesc, uint32_t msgDesc,
202 __ESIMD_NS::
simd_mask<execSize> mask = 1) {
203 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
204 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send rspVar");
205 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
206 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc0");
208 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
209 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
211 constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
212 return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, execSize>(
213 modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
214 msgSrc0.data(), msgDst.data());
237 template <
typename T1,
int n1,
typename T2,
int n2,
int N = 16>
239 raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
240 uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
241 uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0,
242 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
243 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
244 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
245 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
246 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc1");
248 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
249 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
251 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
252 __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
253 modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
254 msgSrc0.data(), msgSrc1.data());
273 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
274 uint8_t isEOT = 0, uint8_t isSendc = 0,
typename T1,
int n1,
279 __ESIMD_NS::
simd<T2, n2> msgSrc1, uint32_t exDesc,
280 uint32_t msgDesc, __ESIMD_NS::
simd_mask<execSize> mask = 1) {
281 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
282 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
283 constexpr
unsigned _Width2 = n2 *
sizeof(T2);
284 static_assert(_Width2 % 32 == 0,
"Invalid size for raw send msgSrc1");
286 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
287 using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
289 constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
290 __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, execSize>(
291 modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
292 msgSrc0.data(), msgSrc1.data());
313 template <
typename T1,
int n1,
int N = 16>
315 raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
316 uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
317 uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
318 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
319 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
320 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
321 uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
322 __esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
323 numSrc0, sfid, exDesc, msgDesc,
342 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
343 uint8_t isSendc = 0,
typename T1,
int n1>
347 uint32_t msgDesc, __ESIMD_NS::
simd_mask<execSize> mask = 1) {
348 constexpr
unsigned _Width1 = n1 *
sizeof(T1);
349 static_assert(_Width1 % 32 == 0,
"Invalid size for raw send msgSrc0");
350 using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
351 constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
352 __esimd_raw_send2_noresult<ElemT1, n1, execSize>(
353 modifier, execSize, mask.data(), numSrc0, sfid, exDesc, msgDesc,
370 __esimd_nbarrier(0 ,
id, 0 );
378 __esimd_nbarrier_init(NbarCount);
394 uint8_t producer_consumer_mode,
395 uint32_t num_producers,
396 uint32_t num_consumers) {
399 #ifdef __ESIMD_USE_NEW_NAMED_BARRIER_INTRIN
400 __esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
403 constexpr uint32_t gateway = 3;
404 constexpr uint32_t
barrier = 4;
405 constexpr uint32_t descriptor = 1 << 25 |
409 __ESIMD_DNS::vector_type_t<uint32_t, 8> payload = 0;
410 payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
411 producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;
412 __esimd_raw_send_nbarrier_signal<uint32_t, 8>(
413 0 , gateway, descriptor, payload, 1 );
420 template <
typename T,
int N>
421 __ESIMD_API std::enable_if_t<(
sizeof(T) * N >= 2)>
422 wait(__ESIMD_NS::simd<T, N> value) {
423 #ifdef __SYCL_DEVICE_ONLY__
424 uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
432 template <
typename T,
typename RegionT>
433 __ESIMD_API std::enable_if_t<
435 wait(__ESIMD_NS::simd_view<T, RegionT> value) {
436 #ifdef __SYCL_DEVICE_ONLY__
437 uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
452 template <
typename T,
int NBlocks,
int Height,
int Width,
bool Transposed,
456 Transposed, Transformed>();
460 template <
typename RT,
typename T,
int N>
461 ESIMD_INLINE __ESIMD_NS::simd<RT, N>
463 return __ESIMD_DNS::lsc_format_input<RT, T, N>(Vals);
467 template <
typename T,
typename T1,
int N>
468 ESIMD_INLINE __ESIMD_NS::simd<T, N>
470 return __ESIMD_DNS::lsc_format_ret<T, T1, N>(Vals);
484 static_assert(
true,
"Unsupported data type.");
488 template <cache_h
int L1H = cache_h
int::none, cache_h
int L2H = cache_h
int::none>
490 if constexpr (L1H == cache_hint::read_invalidate &&
491 L2H == cache_hint::cached) {
494 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
497 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
500 if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
503 if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
506 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
509 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
515 template <cache_h
int L1H = cache_h
int::none, cache_h
int L2H = cache_h
int::none>
517 if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
520 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
523 if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
526 if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
529 if constexpr (L1H == cache_hint::write_through &&
530 L2H == cache_hint::uncached) {
533 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
536 if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
559 template <
typename T,
int NElts = 1,
561 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
563 __ESIMD_NS::simd_mask<N> pred = 1) {
564 __ESIMD_NS::simd<T, N * NElts> pass_thru;
565 return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
585 template <
typename T,
int NElts = 1,
587 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
589 __ESIMD_NS::simd_mask<N> pred,
590 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
591 return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
609 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
610 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
611 __ESIMD_API __ESIMD_NS::simd<T, NElts>
613 FlagsT flags = FlagsT{}) {
615 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
616 return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, Props);
636 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
637 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
638 __ESIMD_API __ESIMD_NS::simd<T, NElts>
640 __ESIMD_NS::simd<T, NElts> pass_thru) {
642 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
643 return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, pass_thru, Props);
664 template <
typename T,
int NElts = 1,
667 int N,
typename Toffset>
668 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
670 __ESIMD_NS::simd_mask<N> pred = 1) {
671 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
672 __ESIMD_NS::simd<T, N * NElts> PassThru;
673 return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
697 template <
typename T,
int NElts = 1,
700 int N,
typename Toffset>
701 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
703 __ESIMD_NS::simd_mask<N> pred,
704 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
705 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
706 return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
710 template <
typename T,
int NElts = 1,
713 int N,
typename OffsetObjT,
typename RegionTy>
714 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
715 lsc_gather(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
716 __ESIMD_NS::simd_mask<N> pred = 1) {
717 return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
720 template <
typename T,
int NElts = 1,
723 int N,
typename OffsetObjT,
typename RegionTy>
724 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
725 lsc_gather(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
726 __ESIMD_NS::simd_mask<N> pred,
727 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
728 return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
732 template <
typename T,
int NElts = 1,
735 int N,
typename Toffset>
736 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
737 __ESIMD_NS::simd<T, N * NElts>>
738 lsc_gather(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
739 return lsc_gather<T, NElts, DS, L1H, L2H, N>(
740 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
743 template <
typename T,
int NElts = 1,
746 int N,
typename Toffset>
747 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
748 __ESIMD_NS::simd<T, N * NElts>>
749 lsc_gather(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
750 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
751 return lsc_gather<T, NElts, DS, L1H, L2H, N>(
752 p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
774 template <
typename T,
int NElts = 1,
777 int N,
typename AccessorTy>
779 std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
780 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
781 __ESIMD_NS::simd<T, N * NElts>>
783 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
784 __ESIMD_NS::simd_mask<N> pred = 1) {
785 #ifdef __ESIMD_FORCE_STATELESS_MEM
786 return lsc_gather<T, NElts, DS, L1H, L2H>(
787 __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
789 __ESIMD_NS::simd<T, N * NElts> PassThru;
790 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
791 return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
792 acc, offsets, pred, PassThru);
796 #ifdef __ESIMD_FORCE_STATELESS_MEM
797 template <
typename T,
int NElts = 1,
800 int N,
typename AccessorTy,
typename Toffset>
801 __ESIMD_API std::enable_if_t<
802 __ESIMD_DNS::is_device_accessor_with_v<
803 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
804 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
805 __ESIMD_NS::simd<T, N * NElts>>
806 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
807 __ESIMD_NS::simd_mask<N> pred = 1) {
808 return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
809 acc, convert<uint64_t>(offsets), pred);
813 template <
typename T,
int NElts = 1,
816 int N,
typename AccessorTy>
818 std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
819 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
820 __ESIMD_NS::simd<T, N * NElts>>
821 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
822 __ESIMD_NS::simd_mask<N> pred = 1) {
823 return lsc_slm_gather<T, NElts, DS>(
824 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
848 template <
typename T,
int NElts = 1,
851 int N,
typename AccessorTy>
853 std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
854 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
855 __ESIMD_NS::simd<T, N * NElts>>
857 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
858 __ESIMD_NS::simd_mask<N> pred,
859 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
860 #ifdef __ESIMD_FORCE_STATELESS_MEM
861 return lsc_gather<T, NElts, DS, L1H, L2H>(
862 reinterpret_cast<T *
>(__ESIMD_DNS::accessorToPointer<T>(acc)), offsets,
866 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
867 return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
868 acc, offsets, pred, pass_thru);
872 #ifdef __ESIMD_FORCE_STATELESS_MEM
873 template <
typename T,
int NElts = 1,
876 int N,
typename AccessorTy,
typename Toffset>
877 __ESIMD_API std::enable_if_t<
878 __ESIMD_DNS::is_device_accessor_with_v<
879 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
880 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
881 __ESIMD_NS::simd<T, N * NElts>>
882 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
883 __ESIMD_NS::simd_mask<N> pred,
884 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
885 return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
886 acc, convert<uint64_t>(offsets), pred, pass_thru);
890 template <
typename T,
int NElts = 1,
893 int N,
typename AccessorTy>
894 __ESIMD_API std::enable_if_t<
895 sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
896 __ESIMD_NS::simd<T, N * NElts>>
897 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
898 __ESIMD_NS::simd_mask<N> pred,
899 __ESIMD_NS::simd<T, N * NElts> pass_thru) {
900 return lsc_slm_gather<T, NElts, DS>(
901 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru);
939 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
941 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
942 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
943 __ESIMD_NS::simd<T, NElts>>
946 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
947 __ESIMD_NS::simd<T, NElts> PassThru;
948 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
980 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
982 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
983 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
984 __ESIMD_NS::simd<T, NElts>>
987 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
988 __ESIMD_NS::simd<T, NElts> PassThru;
989 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(
990 p, __ESIMD_NS::simd_mask<1>(1), PassThru);
1024 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1026 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1027 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1028 __ESIMD_NS::simd<T, NElts>>
1030 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1032 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1033 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
1068 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1070 typename AccessorTy,
1071 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1072 __ESIMD_API std::enable_if_t<
1073 __ESIMD_DNS::is_device_accessor_with_v<
1074 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1075 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1076 __ESIMD_NS::simd<T, NElts>>
1078 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1080 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1081 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1085 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1087 typename AccessorTy,
1088 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1089 __ESIMD_API std::enable_if_t<
1090 __ESIMD_DNS::is_local_accessor_with_v<
1091 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1092 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1093 __ESIMD_NS::simd<T, NElts>>
1095 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1096 return lsc_slm_block_load<T, NElts, DS>(
1097 offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, flags);
1127 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1129 typename AccessorTy,
1130 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1131 __ESIMD_API std::enable_if_t<
1132 __ESIMD_DNS::is_device_accessor_with_v<
1133 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1134 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1135 __ESIMD_NS::simd<T, NElts>>
1138 return lsc_block_load<T, NElts, DS, L1H, L2H>(
1139 acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1142 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1144 typename AccessorTy,
1145 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1146 __ESIMD_API std::enable_if_t<
1147 __ESIMD_DNS::is_local_accessor_with_v<
1148 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1149 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1150 __ESIMD_NS::simd<T, NElts>>
1152 return lsc_block_load<T, NElts, DS, L1H, L2H>(
1153 acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1188 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1190 typename AccessorTy,
1191 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1192 __ESIMD_API std::enable_if_t<
1193 __ESIMD_DNS::is_device_accessor_with_v<
1194 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1195 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1196 __ESIMD_NS::simd<T, NElts>>
1198 __ESIMD_NS::simd_mask<1> pred,
1199 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1201 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1202 return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1206 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1208 typename AccessorTy,
1209 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1210 __ESIMD_API std::enable_if_t<
1211 __ESIMD_DNS::is_local_accessor_with_v<
1212 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1213 __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1214 __ESIMD_NS::simd<T, NElts>>
1216 __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
1217 return lsc_slm_block_load<T, NElts, DS>(
1218 offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru, flags);
1237 template <
typename T,
int NElts = 1,
1240 int N,
typename Toffset>
1241 __ESIMD_API
void lsc_prefetch(
const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1242 __ESIMD_NS::simd_mask<N> pred = 1) {
1243 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1244 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, offsets, pred);
1247 template <
typename T,
int NElts = 1,
1250 int N,
typename OffsetObjT,
typename RegionTy>
1252 lsc_prefetch(
const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1253 __ESIMD_NS::simd_mask<N> pred = 1) {
1254 lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
1257 template <
typename T,
int NElts = 1,
1260 int N,
typename Toffset>
1261 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
1262 lsc_prefetch(
const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
1263 lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
1264 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
1292 template <
typename T,
int NElts = 1,
1295 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1296 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1298 __ESIMD_NS::simd_mask<1> Mask = 1;
1300 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1301 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, 0, Mask);
1321 template <
typename T,
int NElts = 1,
1324 int N,
typename AccessorTy>
1325 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1326 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1328 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1329 __ESIMD_NS::simd_mask<N> pred = 1) {
1330 #ifdef __ESIMD_FORCE_STATELESS_MEM
1331 lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1334 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1335 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offsets, pred);
1339 #ifdef __ESIMD_FORCE_STATELESS_MEM
1340 template <
typename T,
int NElts = 1,
1343 int N,
typename AccessorTy,
typename Toffset>
1344 __ESIMD_API std::enable_if_t<
1345 __ESIMD_DNS::is_device_accessor_with_v<
1346 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1347 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1348 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1349 __ESIMD_NS::simd_mask<N> pred = 1) {
1350 lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1351 acc, convert<uint64_t>(offsets), pred);
1382 template <
typename T,
int NElts = 1,
1385 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag,
1386 typename AccessorTy>
1387 __ESIMD_API std::enable_if_t<
1388 __ESIMD_DNS::is_device_accessor_with_v<
1389 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1390 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1392 FlagsT flags = FlagsT{}) {
1393 #ifdef __ESIMD_FORCE_STATELESS_MEM
1394 lsc_prefetch<T, NElts, DS, L1H, L2H>(
1395 __ESIMD_DNS::accessorToPointer<T>(acc, offset), flags);
1397 __ESIMD_NS::simd_mask<1> Mask = 1;
1399 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1400 __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offset, Mask);
1418 template <
typename T,
int NElts = 1,
1421 __ESIMD_NS::simd<T, N * NElts> vals,
1422 __ESIMD_NS::simd_mask<N> pred = 1) {
1423 __ESIMD_DNS::slm_scatter_impl<T, NElts, DS>(offsets, vals, pred);
1438 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1439 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1441 __ESIMD_NS::simd<T, NElts> vals,
1442 FlagsT flags = FlagsT{}) {
1445 FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
1446 __ESIMD_NS::simd_mask<1> pred = 1;
1447 __ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
1467 template <
typename T,
int NElts = 1,
1470 int N,
typename Toffset>
1471 __ESIMD_API
void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1472 __ESIMD_NS::simd<T, N * NElts> vals,
1473 __ESIMD_NS::simd_mask<N> pred = 1) {
1474 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1475 __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT, N, Toffset>(
1476 p, offsets, vals, pred);
1479 template <
typename T,
int NElts = 1,
1482 int N,
typename OffsetObjT,
typename RegionTy>
1484 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1485 __ESIMD_NS::simd<T, N * NElts> vals,
1486 __ESIMD_NS::simd_mask<N> pred = 1) {
1487 lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
1490 template <
typename T,
int NElts = 1,
1493 int N,
typename Toffset>
1494 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1495 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
1496 __ESIMD_NS::simd_mask<N> pred = 1) {
1497 lsc_scatter<T, NElts, DS, L1H, L2H, N>(
1498 p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
1519 template <
typename T,
int NElts = 1,
1522 int N,
typename AccessorTy>
1523 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1524 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1526 __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1527 __ESIMD_NS::simd<T, N * NElts> vals,
1528 __ESIMD_NS::simd_mask<N> pred = 1) {
1529 #ifdef __ESIMD_FORCE_STATELESS_MEM
1530 lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1531 offsets, vals, pred);
1533 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1534 __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT>(acc, offsets, vals,
1539 #ifdef __ESIMD_FORCE_STATELESS_MEM
1540 template <
typename T,
int NElts = 1,
1543 int N,
typename AccessorTy,
typename Toffset>
1544 __ESIMD_API std::enable_if_t<
1545 __ESIMD_DNS::is_device_accessor_with_v<
1546 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1547 std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1548 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1549 __ESIMD_NS::simd<T, N * NElts> vals,
1550 __ESIMD_NS::simd_mask<N> pred = 1) {
1551 lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1552 acc, convert<uint64_t>(offsets), vals, pred);
1556 template <
typename T,
int NElts = 1,
1559 int N,
typename AccessorTy>
1560 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
1561 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1563 __ESIMD_NS::simd<T, N * NElts> vals,
1564 __ESIMD_NS::simd_mask<N> pred = 1) {
1565 lsc_slm_scatter<T, NElts, DS>(
1566 offsets + __ESIMD_DNS::localAccessorToOffset(acc), vals, pred);
1601 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1603 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1604 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1606 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1608 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1609 return __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(p, vals, pred);
1640 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1642 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1643 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1645 lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
1683 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1685 typename AccessorTy,
1686 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1687 __ESIMD_API std::enable_if_t<
1688 __ESIMD_DNS::is_device_accessor_with_v<
1689 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1690 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1692 __ESIMD_NS::simd<T, NElts> vals,
1693 __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1695 L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1696 __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(acc, offset, vals,
1700 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1702 typename AccessorTy,
1703 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1704 __ESIMD_API std::enable_if_t<
1705 __ESIMD_DNS::is_local_accessor_with_v<
1706 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1707 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1709 __ESIMD_NS::simd<T, NElts> vals, FlagsT flags = FlagsT{}) {
1710 lsc_slm_block_store<T, NElts, DS>(
1711 offset + __ESIMD_DNS::localAccessorToOffset(acc), vals, flags);
1744 template <
typename T,
int NElts,
lsc_data_size DS = lsc_data_size::default_size,
1746 typename AccessorTy,
1747 typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1748 __ESIMD_API std::enable_if_t<
1749 __ESIMD_DNS::is_accessor_with_v<
1750 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1751 __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1753 __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1754 lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
1755 __ESIMD_NS::simd_mask<1>(1), flags);
1788 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1789 bool Transposed =
false,
bool Transformed =
false,
1792 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1793 __ESIMD_API __ESIMD_NS::simd<T, N>
1794 lsc_load_2d(
const T *Ptr,
unsigned SurfaceWidth,
unsigned SurfaceHeight,
1795 unsigned SurfacePitch,
int X,
int Y) {
1796 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1797 return __ESIMD_DNS::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1798 Transposed, Transformed, PropertyListT>(
1799 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
1824 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
1827 T, NBlocks, BlockHeight, BlockWidth,
false,
false>()>
1829 unsigned SurfaceHeight,
unsigned SurfacePitch,
1831 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1832 __ESIMD_DNS::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1833 PropertyListT>(Ptr, SurfaceWidth, SurfaceHeight,
1834 SurfacePitch, X, Y);
1861 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
1864 T, 1u, BlockHeight, BlockWidth,
false,
false>()>
1866 unsigned SurfaceHeight,
unsigned SurfacePitch,
1867 int X,
int Y, __ESIMD_NS::simd<T, N> Vals) {
1868 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1869 __ESIMD_DNS::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
1870 Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
1880 template <
typename T,
int BlockW
idth,
int BlockHeight,
int NBlocks>
1887 payload_data.template select<1, 1>(7) =
1888 ((NBlocks - 1) << 16) | ((BlockHeight - 1) << 8) | (BlockWidth - 1);
1895 : payload_data(other.payload) {}
1909 uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X,
1912 payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1914 payload_data.template select<1, 1>(2) = SurfaceWidth;
1915 payload_data.template select<1, 1>(3) = SurfaceHeight;
1916 payload_data.template select<1, 1>(4) = SurfacePitch;
1917 payload_data.template select<1, 1>(5) = X;
1918 payload_data.template select<1, 1>(6) = Y;
1928 ->payload_data.template bit_cast_view<uint64_t>()[0]));
1937 ->payload_data.template select<1, 1>(2);
1946 ->payload_data.template select<1, 1>(3);
1955 ->payload_data.template select<1, 1>(4);
1964 ->payload_data.template select<1, 1>(5);
1973 ->payload_data.template select<1, 1>(6);
2000 payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
2011 payload_data.template select<1, 1>(2) = SurfaceWidth;
2021 payload_data.template select<1, 1>(3) = SurfaceHeight;
2031 payload_data.template select<1, 1>(4) = SurfacePitch;
2041 payload_data.template select<1, 1>(5) = X;
2051 payload_data.template select<1, 1>(6) = Y;
2056 __ESIMD_NS::simd<uint32_t, 16> get_raw_data() {
return payload_data; }
2057 __ESIMD_NS::simd<uint32_t, 16> payload_data;
2059 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
2062 friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N>
lsc_load_2d(
2065 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
2069 __ESIMD_NS::simd<T1, N> Data);
2071 template <
typename T1,
int BlockWidth1,
int BlockHeight1,
int NBlocks1,
2097 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
2098 bool Transposed =
false,
bool Transformed =
false,
2101 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2104 __ESIMD_DNS::check_lsc_block_2d_restrictions<
2105 T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2106 __ESIMD_DNS::block_2d_op::load>();
2107 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2110 constexpr
int ElemsPerDword = 4 /
sizeof(T);
2111 constexpr
int GRFRowSize = Transposed ? BlockHeight
2112 : Transformed ? BlockWidth * ElemsPerDword
2114 constexpr
int GRFRowPitch = __ESIMD_DNS::getNextPowerOf2<GRFRowSize>();
2115 constexpr
int GRFColSize =
2118 : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
2120 constexpr
int GRFBlockSize = GRFRowPitch * GRFColSize;
2121 constexpr
int GRFBlockPitch =
2122 __ESIMD_DNS::roundUpNextMultiple<64 /
sizeof(T), GRFBlockSize>();
2123 constexpr
int ActualN = NBlocks * GRFBlockPitch;
2125 constexpr
int DstBlockElements = GRFColSize * GRFRowSize;
2126 constexpr
int DstElements = DstBlockElements * NBlocks;
2128 constexpr uint32_t GrfBytes = 64;
2129 constexpr uint32_t DstBlockSize =
2130 __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof(T), GrfBytes>();
2131 constexpr uint32_t DstLength =
2132 (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
2133 constexpr uint32_t DstLengthMask = DstLength << 20;
2135 static_assert(N == ActualN || N == DstElements,
"Incorrect element count");
2137 constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2139 constexpr uint32_t base_desc = 0x2000003;
2140 constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2141 constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2142 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2143 __ESIMD_NS::simd<T, N> oldDst;
2144 constexpr uint32_t exDesc = 0x0;
2145 constexpr uint32_t desc = base_desc | cache_mask | transformMask |
2146 transposeMask | dataSizeMask | DstLengthMask;
2147 constexpr uint8_t execSize = 1;
2148 constexpr uint8_t sfid = 0xF;
2149 constexpr uint8_t numSrc0 = 0x1;
2150 constexpr uint8_t numDst = (N *
sizeof(T)) / 64;
2151 __ESIMD_NS::simd<T, ActualN> Raw =
2152 __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
2153 oldDst, payload.get_raw_data(), exDesc, desc);
2155 if constexpr (ActualN == N) {
2161 __ESIMD_NS::simd<T, DstElements> Dst;
2163 for (
auto i = 0; i < NBlocks; i++) {
2165 Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
2167 auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
2168 DstBlock = RawBlock.template bit_cast_view<T, GRFColSize, GRFRowPitch>()
2169 .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
2170 .template bit_cast_view<T>();
2193 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
2194 bool Transposed =
false,
bool Transformed =
false,
2197 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2200 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2203 __ESIMD_DNS::check_lsc_block_2d_restrictions<
2204 T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2206 static_assert(!Transposed || !Transformed,
2207 "Transposed and transformed is not supported");
2208 constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2210 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2211 constexpr uint32_t base_desc = 0x2000003;
2212 constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2213 constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2214 constexpr uint32_t exDesc = 0x0;
2215 constexpr uint32_t desc =
2216 base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2217 constexpr uint8_t execSize = 1;
2218 constexpr uint8_t sfid = 0xF;
2219 constexpr uint8_t numDst = (N *
sizeof(T)) / 64;
2220 __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data(), exDesc,
2239 template <
typename T,
int BlockWidth,
int BlockHeight = 1,
int NBlocks = 1,
2242 T, NBlocks, BlockHeight, BlockWidth,
false,
false>()>
2243 ESIMD_INLINE SYCL_ESIMD_FUNCTION
void
2245 __ESIMD_NS::simd<T, N> Data) {
2246 __ESIMD_DNS::check_lsc_block_2d_restrictions<
2247 T, BlockWidth, BlockHeight, NBlocks,
false,
false,
2248 __ESIMD_DNS::block_2d_op::store>();
2249 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2253 constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2255 constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2256 constexpr uint32_t base_desc = 0x2000007;
2258 constexpr uint32_t exDesc = 0x0;
2259 constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2260 constexpr uint8_t execSize = 1;
2261 constexpr uint8_t sfid = 0xF;
2262 constexpr uint8_t numSrc0 = 0x1;
2263 constexpr uint8_t numSrc1 = (N *
sizeof(T)) / 64;
2265 __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1>(
2266 payload.get_raw_data(), Data, exDesc, desc);
2276 template <
typename T, __ESIMD_NS::atomic_op Op>
2279 __ESIMD_DNS::to_lsc_atomic_op<Op>();
2280 return static_cast<int>(LSCOp);
2299 __ESIMD_API __ESIMD_NS::simd<T, N>
2301 __ESIMD_NS::simd_mask<N> pred) {
2302 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, pred);
2321 __ESIMD_API __ESIMD_NS::simd<T, N>
2323 __ESIMD_NS::simd<T, N>
src0,
2324 __ESIMD_NS::simd_mask<N> pred) {
2325 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets,
src0, pred);
2345 __ESIMD_API __ESIMD_NS::simd<T, N>
2347 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2348 __ESIMD_NS::simd_mask<N> pred) {
2349 return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets,
src0,
src1,
2371 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
2372 __ESIMD_NS::simd<T, N>>
2374 __ESIMD_NS::simd_mask<N> pred) {
2375 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2376 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2384 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2385 __ESIMD_DNS::get_num_args<Op>() == 0,
2386 __ESIMD_NS::simd<T, N>>
2388 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2389 p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
2411 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2412 __ESIMD_NS::simd<T, N>>
2414 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2415 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2416 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2417 p, offsets,
src0, pred);
2423 typename OffsetObjT,
typename RegionTy>
2424 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2425 __ESIMD_NS::simd<T, N>>
2427 __ESIMD_NS::simd<T, N>
src0,
2428 __ESIMD_NS::simd_mask<N> pred = 1) {
2429 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(),
src0,
2437 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2438 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2439 ((Op != __ESIMD_NS::atomic_op::store &&
2440 Op != __ESIMD_NS::atomic_op::xchg) ||
2442 __ESIMD_NS::simd<T, N>>
2444 __ESIMD_NS::simd_mask<N> pred = 1) {
2445 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2446 p, __ESIMD_NS::simd<Toffset, N>(offset),
src0, pred);
2469 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2470 __ESIMD_NS::simd<T, N>>
2472 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2473 __ESIMD_NS::simd_mask<N> pred) {
2474 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2475 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2482 typename OffsetObjT,
typename RegionTy>
2483 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2484 __ESIMD_NS::simd<T, N>>
2486 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2487 __ESIMD_NS::simd_mask<N> pred = 1) {
2488 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(),
src0,
2496 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2497 __ESIMD_DNS::get_num_args<Op>() == 2,
2498 __ESIMD_NS::simd<T, N>>
2500 __ESIMD_NS::simd<T, N>
src1,
2501 __ESIMD_NS::simd_mask<N> pred = 1) {
2502 return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2503 p, __ESIMD_NS::simd<Toffset, N>(offset),
src0,
src1, pred);
2526 typename AccessorTy,
typename Toffset>
2527 __ESIMD_API std::enable_if_t<
2528 __ESIMD_DNS::is_device_accessor_with_v<
2529 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
2530 (Op == __ESIMD_NS::atomic_op::load ||
2531 __ESIMD_DNS::is_device_accessor_with_v<
2532 AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>),
2533 __ESIMD_NS::simd<T, N>>
2535 __ESIMD_NS::simd_mask<N> pred) {
2536 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2537 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2538 acc, offsets, pred);
2559 typename AccessorTy>
2560 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2561 __ESIMD_NS::simd<T, N>>
2563 __ESIMD_NS::simd_mask<N> pred) {
2564 return lsc_slm_atomic_update<Op, T, N, DS>(
2565 offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
2589 typename AccessorTy,
typename Toffset>
2590 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2591 __ESIMD_NS::simd<T, N>>
2593 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2594 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2595 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2596 acc, offsets,
src0, pred);
2618 typename AccessorTy>
2619 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2620 __ESIMD_NS::simd<T, N>>
2622 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd_mask<N> pred) {
2623 return lsc_slm_atomic_update<Op, T, N, DS>(
2624 offsets + __ESIMD_DNS::localAccessorToOffset(acc),
src0, pred);
2649 typename AccessorTy,
typename Toffset>
2650 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2651 __ESIMD_NS::simd<T, N>>
2653 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2654 __ESIMD_NS::simd_mask<N> pred) {
2655 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2656 return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2680 typename AccessorTy>
2681 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2682 __ESIMD_NS::simd<T, N>>
2684 __ESIMD_NS::simd<T, N>
src0, __ESIMD_NS::simd<T, N>
src1,
2685 __ESIMD_NS::simd_mask<N> pred) {
2686 return lsc_slm_atomic_update<Op, T, N, DS>(
2687 offsets + __ESIMD_DNS::localAccessorToOffset(acc),
src0,
src1, pred);
2698 template <lsc_memory_kind Kind = lsc_memory_kind::untyped_global,
2699 lsc_fence_op FenceOp = lsc_fence_op::none,
2700 lsc_scope Scope = lsc_scope::group,
int N = 16>
2704 Kind != lsc_memory_kind::shared_local ||
2705 (FenceOp == lsc_fence_op::none && Scope == lsc_scope::group),
2706 "SLM fence must have 'none' lsc_fence_op and 'group' scope");
2707 static_assert(Kind != lsc_memory_kind::untyped_global_low_pri,
2708 "lsc_memory_kind::untyped_global_low_pri is not supported in HW"
2709 " and/or GPU drivers");
2710 __esimd_lsc_fence<static_cast<uint8_t>(Kind),
static_cast<uint8_t
>(FenceOp),
2711 static_cast<uint8_t
>(Scope), N>(pred.data());
2724 #ifdef __SYCL_DEVICE_ONLY__
2725 return __spirv_BuiltInGlobalHWThreadIDINTEL();
2732 #ifdef __SYCL_DEVICE_ONLY__
2733 return __spirv_BuiltInSubDeviceIDINTEL();
2748 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2749 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2750 __ESIMD_DNS::get_num_args<Op>() == 0,
2753 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2759 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
simd<T, N>>
2762 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2766 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2767 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2768 __ESIMD_DNS::get_num_args<Op>() == 0,
2771 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2776 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2777 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2778 __ESIMD_DNS::get_num_args<Op>() == 1,
2782 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2783 p, offset,
src0, mask);
2788 __ESIMD_API __ESIMD_API
2789 std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
simd<T, N>>
2792 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2793 p, offsets,
src0, mask);
2796 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2797 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2798 __ESIMD_DNS::get_num_args<Op>() == 1,
2801 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2802 p, offset,
src0, mask);
2806 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2807 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2808 __ESIMD_DNS::get_num_args<Op>() == 2,
2815 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2821 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
simd<T, N>>
2824 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2828 template <native::lsc::atomic_op Op,
typename T,
int N,
typename Toffset>
2829 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2830 __ESIMD_DNS::get_num_args<Op>() == 2,
2831 __ESIMD_NS::simd<T, N>>
2834 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2839 typename AccessorTy>
2840 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2841 __ESIMD_DNS::get_num_args<Op>() == 0 &&
2842 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2845 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2850 typename RegionTy,
typename AccessorTy>
2851 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
2852 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2856 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2857 acc, offsets, mask);
2861 typename AccessorTy>
2862 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2863 __ESIMD_DNS::get_num_args<Op>() == 0 &&
2864 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2867 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2873 typename AccessorTy>
2874 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2875 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2876 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2880 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2881 acc, offset,
src0, mask);
2885 typename RegionTy,
typename AccessorTy>
2886 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
2887 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2891 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2892 acc, offsets,
src0, mask);
2896 typename AccessorTy>
2897 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2898 __ESIMD_DNS::get_num_args<Op>() == 1 &&
2899 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2903 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2904 acc, offset,
src0, mask);
2909 typename AccessorTy>
2910 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2911 __ESIMD_DNS::get_num_args<Op>() == 2 &&
2912 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2919 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2924 typename RegionTy,
typename AccessorTy>
2925 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
2926 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2930 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2935 typename AccessorTy>
2936 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2937 __ESIMD_DNS::get_num_args<Op>() == 2 &&
2938 __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2939 __ESIMD_NS::simd<T, N>>
2942 return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
This class represents a reference to a sub-region of a base simd object.
The main simd vector class.
RAII-style class used to implement "semi-dynamic" SLM allocation.
~slm_allocator()
Releases the SLM chunk allocated in the constructor.
slm_allocator()
Allocates the amount of SLM which is class' template parameter.
ESIMD_INLINE int get_offset() const
Container class to hold parameters for load2d/store2d functions
T * get_data_pointer() const
Get a surface base address
uint32_t get_surface_pitch() const
Get surface pitch
config_2d_mem_access & set_x(int32_t X)
Sets top left corner X coordinate of the block
constexpr int32_t get_number_of_blocks() const
Get number of blocks
constexpr int32_t get_width() const
Get width of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload, sycl::ext::intel::esimd::simd< T1, N > Data)
config_2d_mem_access & set_surface_width(uint32_t SurfaceWidth)
Sets surface width
uint32_t get_surface_height() const
Get surface height
config_2d_mem_access()
Default constructor
int32_t get_x() const
Get top left corner X coordinate of the block
config_2d_mem_access & set_data_pointer(T *Ptr)
Sets surface base address
config_2d_mem_access & set_surface_height(uint32_t SurfaceHeight)
Sets surface height
config_2d_mem_access(const config_2d_mem_access &other)
Copy constructor
config_2d_mem_access & set_y(int32_t Y)
Sets top left corner Y coordinate of the block
config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth, uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X, int32_t Y)
Constructor
config_2d_mem_access & set_surface_pitch(uint32_t SurfacePitch)
Sets surface pitch
constexpr int32_t get_height() const
Get height of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T1, N > lsc_load_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
uint32_t get_surface_width() const
Get surface width
int32_t get_y() const
Get top left corner Y coordinate of the block
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
("use sycl::ext::intel::esimd::memory_kind") lsc_memory_kind __ESIMD_DNS::lsc_data_size lsc_data_size
The scope that lsc_fence operation should apply to Supported platforms: DG2, PVC.
sycl::ext::intel::esimd::cache_hint cache_hint
L1 or L2 cache hint kinds.
atomic_op
Represents an atomic operation.
split_barrier_action
Represents a split barrier action.
__ESIMD_API int32_t get_subdevice_id()
Get subdevice ID.
__ESIMD_API int32_t get_hw_thread_id()
Get HW Thread ID.
__ESIMD_API SZ simd< T, SZ > src1
__SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::addc(carry, src0, src1);") __ESIMD_API sycl
__ESIMD_API void lsc_slm_block_store(uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
Transposed SLM scatter with 1 channel.
__ESIMD_API void lsc_prefetch(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer prefetch gather.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > lsc_atomic_update(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
USM pointer atomic.
__ESIMD_API void lsc_slm_scatter(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM scatter.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block load.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_slm_atomic_update(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
SLM atomic.
__ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
2D USM pointer block store.
atomic_op
LSC atomic operation codes.
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > lsc_slm_block_load(uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
Transposed SLM gather with 1 channel.
__ESIMD_API void lsc_fence(sycl::ext::intel::esimd::simd_mask< N > pred=1)
Memory fence.
__ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block prefetch.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_slm_gather(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM gather.
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > lsc_block_load(const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed gather with 1 channel.
__ESIMD_API void lsc_scatter(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer scatter.
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > lsc_block_store(T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed scatter with 1 channel.
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_gather(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer gather.
__ESIMD_API void named_barrier_wait(uint8_t id)
Wait on a named barrier Available only on PVC.
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
__ESIMD_API void named_barrier_init()
Initialize number of named barriers for a kernel Available only on PVC.
__ESIMD_API void named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode, uint32_t num_producers, uint32_t num_consumers)
Perform signal operation for the given named barrier Available only on PVC.
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
__ESIMD_API void split_barrier()
Generic work-group split barrier.
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
__ESIMD_API void barrier()
Generic work-group barrier.
@ global_coherent_fence
“Commit enable” - wait for fence to complete before continuing.
@ local_barrier
Issue SLM memory barrier only. If not set, the memory barrier is global.
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw send.
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, sycl::ext::intel::esimd::simd< T3, n3 > msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw sends.
typename make_L1_L2_alignment_properties< L1H, L2H, Alignment >::type make_L1_L2_alignment_properties_t
uint32_t DeviceAccessorOffsetT
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
constexpr uint32_t get_lsc_store_cache_mask()
constexpr int get_lsc_block_2d_data_size()
constexpr int lsc_to_internal_atomic_op()
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, N > lsc_format_input(sycl::ext::intel::esimd::simd< T, N > Vals)
constexpr uint32_t get_lsc_load_cache_mask()
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > lsc_format_ret(sycl::ext::intel::esimd::simd< T1, N > Vals)
constexpr uint32_t get_lsc_data_size()
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.