DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory.hpp
Go to the documentation of this file.
1 //==-------------- memory.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement experimental Explicit SIMD memory-access APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
18 
19 namespace sycl {
20 inline namespace _V1 {
21 namespace ext::intel {
22 namespace experimental::esimd {
23 
26 
29 template <split_barrier_action flag> __ESIMD_API void split_barrier() {
30  __esimd_sbarrier(flag);
31 }
32 
34 
37 
62 template <typename T1, int n1, typename T2, int n2, typename T3, int n3,
63  int N = 16>
64 __ESIMD_API __ESIMD_NS::simd<T1, n1>
65 raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
66  __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
67  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
68  uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
69  __ESIMD_NS::simd_mask<N> mask = 1) {
70  constexpr unsigned _Width1 = n1 * sizeof(T1);
71  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
72  constexpr unsigned _Width2 = n2 * sizeof(T2);
73  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
74  constexpr unsigned _Width3 = n3 * sizeof(T3);
75  static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
76 
77  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
78  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
79  using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
80 
81  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
82  return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
83  modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
84  msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
85 }
86 
108 template <typename T1, int n1, typename T2, int n2, int N = 16>
109 __ESIMD_API __ESIMD_NS::simd<T1, n1>
110 raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
111  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
112  uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0,
113  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
114  constexpr unsigned _Width1 = n1 * sizeof(T1);
115  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
116  constexpr unsigned _Width2 = n2 * sizeof(T2);
117  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
118 
119  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
120  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
121 
122  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
123  return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
124  modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
125  msgSrc0.data(), msgDst.data());
126 }
127 
148 template <typename T1, int n1, typename T2, int n2, int N = 16>
149 __ESIMD_API void
150 raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
151  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
152  uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0,
153  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
154  constexpr unsigned _Width1 = n1 * sizeof(T1);
155  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
156  constexpr unsigned _Width2 = n2 * sizeof(T2);
157  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
158 
159  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
160  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
161 
162  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
163  __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
164  modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
165  msgSrc0.data(), msgSrc1.data());
166 }
167 
186 template <typename T1, int n1, int N = 16>
187 __ESIMD_API void
188 raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
189  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
190  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
191  constexpr unsigned _Width1 = n1 * sizeof(T1);
192  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
193  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
194  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
195  __esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
196  numSrc0, sfid, exDesc, msgDesc,
197  msgSrc0.data());
198 }
199 
201 
204 
207 
211 template <typename T, int N>
212 __ESIMD_API std::enable_if_t<(sizeof(T) * N >= 2)>
213 wait(__ESIMD_NS::simd<T, N> value) {
214 #ifdef __SYCL_DEVICE_ONLY__
215  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
216  __esimd_wait(Word);
217 #endif // __SYCL_DEVICE_ONLY__
218 }
219 
223 template <typename T, typename RegionT>
224 __ESIMD_API std::enable_if_t<
225  (RegionT::length * sizeof(typename RegionT::element_type) >= 2)>
226 wait(__ESIMD_NS::simd_view<T, RegionT> value) {
227 #ifdef __SYCL_DEVICE_ONLY__
228  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
229  __esimd_wait(Word);
230 #endif // __SYCL_DEVICE_ONLY__
231 }
232 
234 
237 
240 
241 namespace detail {
242 // Compute the data size for 2d block load or store.
243 template <typename T, int NBlocks, int Height, int Width, bool Transposed,
244  bool Transformed>
245 constexpr int get_lsc_block_2d_data_size() {
246  return __ESIMD_DNS::get_lsc_block_2d_data_size<T, NBlocks, Height, Width,
247  Transposed, Transformed>();
248 }
249 
250 // Format u8 and u16 to u8u32 and u16u32 by doing garbage-extension.
251 template <typename RT, typename T, int N>
252 ESIMD_INLINE __ESIMD_NS::simd<RT, N>
253 lsc_format_input(__ESIMD_NS::simd<T, N> Vals) {
254  return __ESIMD_DNS::lsc_format_input<RT, T, N>(Vals);
255 }
256 
257 // Format u8u32 and u16u32 back to u8 and u16.
258 template <typename T, typename T1, int N>
259 ESIMD_INLINE __ESIMD_NS::simd<T, N>
260 lsc_format_ret(__ESIMD_NS::simd<T1, N> Vals) {
261  return __ESIMD_DNS::lsc_format_ret<T, T1, N>(Vals);
262 }
263 
264 template <typename T> constexpr uint32_t get_lsc_data_size() {
265  switch (sizeof(T)) {
266  case 1:
267  return 0;
268  case 2:
269  return 1;
270  case 4:
271  return 2;
272  case 8:
273  return 3;
274  default:
275  static_assert(true, "Unsupported data type.");
276  }
277 }
278 
279 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
280 constexpr uint32_t get_lsc_load_cache_mask() {
281  if constexpr (L1H == cache_hint::read_invalidate &&
282  L2H == cache_hint::cached) {
283  return 7;
284  }
285  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
286  return 6;
287  }
288  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
289  return 5;
290  }
291  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
292  return 4;
293  }
294  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
295  return 3;
296  }
297  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
298  return 2;
299  }
300  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
301  return 1;
302  }
303  return 0;
304 }
305 
306 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
307 constexpr uint32_t get_lsc_store_cache_mask() {
308  if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
309  return 7;
310  }
311  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
312  return 6;
313  }
314  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
315  return 5;
316  }
317  if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
318  return 4;
319  }
320  if constexpr (L1H == cache_hint::write_through &&
321  L2H == cache_hint::uncached) {
322  return 3;
323  }
324  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
325  return 2;
326  }
327  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
328  return 1;
329  }
330  return 0;
331 }
332 
333 } // namespace detail
334 
350 template <typename T, int NElts = 1,
351  lsc_data_size DS = lsc_data_size::default_size, int N>
352 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
353 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
354  __ESIMD_NS::simd_mask<N> pred = 1) {
355  __ESIMD_NS::simd<T, N * NElts> pass_thru;
356  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
357 }
358 
376 template <typename T, int NElts = 1,
377  lsc_data_size DS = lsc_data_size::default_size, int N>
378 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
379 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
380  __ESIMD_NS::simd_mask<N> pred,
381  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
382  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
383 }
384 
400 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
401  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
402 __ESIMD_API __ESIMD_NS::simd<T, NElts>
403 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred = 1,
404  FlagsT flags = FlagsT{}) {
405  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
406  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
407  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, Props);
408 }
409 
427 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
428  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
429 __ESIMD_API __ESIMD_NS::simd<T, NElts>
430 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
431  __ESIMD_NS::simd<T, NElts> pass_thru) {
432  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
433  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
434  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, pass_thru, Props);
435 }
436 
455 template <typename T, int NElts = 1,
456  lsc_data_size DS = lsc_data_size::default_size,
458  int N, typename Toffset>
459 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
460 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
461  __ESIMD_NS::simd_mask<N> pred = 1) {
462  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
463  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally undefined.
464  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
465  PassThru);
466 }
467 
488 template <typename T, int NElts = 1,
489  lsc_data_size DS = lsc_data_size::default_size,
491  int N, typename Toffset>
492 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
493 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
494  __ESIMD_NS::simd_mask<N> pred,
495  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
496  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
497  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
498  pass_thru);
499 }
500 
501 template <typename T, int NElts = 1,
502  lsc_data_size DS = lsc_data_size::default_size,
504  int N, typename OffsetObjT, typename RegionTy>
505 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
506 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
507  __ESIMD_NS::simd_mask<N> pred = 1) {
508  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
509 }
510 
511 template <typename T, int NElts = 1,
512  lsc_data_size DS = lsc_data_size::default_size,
514  int N, typename OffsetObjT, typename RegionTy>
515 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
516 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
517  __ESIMD_NS::simd_mask<N> pred,
518  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
519  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
520  pass_thru);
521 }
522 
523 template <typename T, int NElts = 1,
524  lsc_data_size DS = lsc_data_size::default_size,
526  int N, typename Toffset>
527 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
528  __ESIMD_NS::simd<T, N * NElts>>
529 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
530  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
531  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
532 }
533 
534 template <typename T, int NElts = 1,
535  lsc_data_size DS = lsc_data_size::default_size,
537  int N, typename Toffset>
538 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
539  __ESIMD_NS::simd<T, N * NElts>>
540 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
541  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
542  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
543  p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
544 }
545 
565 template <typename T, int NElts = 1,
566  lsc_data_size DS = lsc_data_size::default_size,
568  int N, typename AccessorTy>
569 __ESIMD_API
570  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
571  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
572  __ESIMD_NS::simd<T, N * NElts>>
573  lsc_gather(AccessorTy acc,
574  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
575  __ESIMD_NS::simd_mask<N> pred = 1) {
576 #ifdef __ESIMD_FORCE_STATELESS_MEM
577  return lsc_gather<T, NElts, DS, L1H, L2H>(
578  __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
579 #else
580  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally uninitialized.
581  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
582  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
583  acc, offsets, pred, PassThru);
584 #endif // __ESIMD_FORCE_STATELESS_MEM
585 }
586 
587 #ifdef __ESIMD_FORCE_STATELESS_MEM
588 template <typename T, int NElts = 1,
589  lsc_data_size DS = lsc_data_size::default_size,
591  int N, typename AccessorTy, typename Toffset>
592 __ESIMD_API std::enable_if_t<
593  __ESIMD_DNS::is_device_accessor_with_v<
594  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
595  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
596  __ESIMD_NS::simd<T, N * NElts>>
597 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
598  __ESIMD_NS::simd_mask<N> pred = 1) {
599  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
600  acc, convert<uint64_t>(offsets), pred);
601 }
602 #endif
603 
604 template <typename T, int NElts = 1,
605  lsc_data_size DS = lsc_data_size::default_size,
607  int N, typename AccessorTy>
608 __ESIMD_API
609  std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
610  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
611  __ESIMD_NS::simd<T, N * NElts>>
612  lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
613  __ESIMD_NS::simd_mask<N> pred = 1) {
614  return lsc_slm_gather<T, NElts, DS>(
615  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
616 }
617 
639 template <typename T, int NElts = 1,
640  lsc_data_size DS = lsc_data_size::default_size,
642  int N, typename AccessorTy>
643 __ESIMD_API
644  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
645  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
646  __ESIMD_NS::simd<T, N * NElts>>
647  lsc_gather(AccessorTy acc,
648  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
649  __ESIMD_NS::simd_mask<N> pred,
650  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
651 #ifdef __ESIMD_FORCE_STATELESS_MEM
652  return lsc_gather<T, NElts, DS, L1H, L2H>(
653  __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred, pass_thru);
654 
655 #else
656  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
657  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
658  acc, offsets, pred, pass_thru);
659 #endif // __ESIMD_FORCE_STATELESS_MEM
660 }
661 
662 #ifdef __ESIMD_FORCE_STATELESS_MEM
663 template <typename T, int NElts = 1,
664  lsc_data_size DS = lsc_data_size::default_size,
666  int N, typename AccessorTy, typename Toffset>
667 __ESIMD_API std::enable_if_t<
668  __ESIMD_DNS::is_device_accessor_with_v<
669  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
670  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
671  __ESIMD_NS::simd<T, N * NElts>>
672 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
673  __ESIMD_NS::simd_mask<N> pred,
674  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
675  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
676  acc, convert<uint64_t>(offsets), pred, pass_thru);
677 }
678 #endif
679 
680 template <typename T, int NElts = 1,
681  lsc_data_size DS = lsc_data_size::default_size,
683  int N, typename AccessorTy>
684 __ESIMD_API std::enable_if_t<
685  sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
686  __ESIMD_NS::simd<T, N * NElts>>
687 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
688  __ESIMD_NS::simd_mask<N> pred,
689  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
690  return lsc_slm_gather<T, NElts, DS>(
691  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru);
692 }
693 
729 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
731  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
732 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
733  __ESIMD_NS::simd<T, NElts>>
734 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
736  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
737  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
738  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
739  PassThru);
740 }
741 
770 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
772  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
773 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
774  __ESIMD_NS::simd<T, NElts>>
775 lsc_block_load(const T *p, FlagsT) {
777  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
778  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
779  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(
780  p, __ESIMD_NS::simd_mask<1>(1), PassThru);
781 }
782 
814 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
816  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
817 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
818  __ESIMD_NS::simd<T, NElts>>
819 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
820  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
822  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
823  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
824  pass_thru);
825 }
826 
858 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
860  typename AccessorTy,
861  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
862 __ESIMD_API std::enable_if_t<
863  __ESIMD_DNS::is_device_accessor_with_v<
864  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
865  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
866  __ESIMD_NS::simd<T, NElts>>
868  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
870  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
871  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
872  pred);
873 }
874 
875 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
877  typename AccessorTy,
878  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
879 __ESIMD_API std::enable_if_t<
880  __ESIMD_DNS::is_local_accessor_with_v<
881  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
882  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
883  __ESIMD_NS::simd<T, NElts>>
884 lsc_block_load(AccessorTy acc, uint32_t offset,
885  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
886  return lsc_slm_block_load<T, NElts, DS>(
887  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, flags);
888 }
889 
917 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
919  typename AccessorTy,
920  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
921 __ESIMD_API std::enable_if_t<
922  __ESIMD_DNS::is_device_accessor_with_v<
923  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
924  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
925  __ESIMD_NS::simd<T, NElts>>
927  FlagsT flags) {
928  return lsc_block_load<T, NElts, DS, L1H, L2H>(
929  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
930 }
931 
932 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
934  typename AccessorTy,
935  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
936 __ESIMD_API std::enable_if_t<
937  __ESIMD_DNS::is_local_accessor_with_v<
938  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
939  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
940  __ESIMD_NS::simd<T, NElts>>
941 lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
942  return lsc_block_load<T, NElts, DS, L1H, L2H>(
943  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
944 }
945 
978 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
980  typename AccessorTy,
981  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
982 __ESIMD_API std::enable_if_t<
983  __ESIMD_DNS::is_device_accessor_with_v<
984  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
985  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
986  __ESIMD_NS::simd<T, NElts>>
988  __ESIMD_NS::simd_mask<1> pred,
989  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
991  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
992  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
993  pred, pass_thru);
994 }
995 
996 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
998  typename AccessorTy,
999  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1000 __ESIMD_API std::enable_if_t<
1001  __ESIMD_DNS::is_local_accessor_with_v<
1002  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1003  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1004  __ESIMD_NS::simd<T, NElts>>
1005 lsc_block_load(AccessorTy acc, uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
1006  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
1007  return lsc_slm_block_load<T, NElts, DS>(
1008  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru, flags);
1009 }
1010 
1027 template <typename T, int NElts = 1,
1028  lsc_data_size DS = lsc_data_size::default_size,
1030  int N, typename Toffset>
1031 __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1032  __ESIMD_NS::simd_mask<N> pred = 1) {
1033  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1034  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, offsets, pred);
1035 }
1036 
1037 template <typename T, int NElts = 1,
1038  lsc_data_size DS = lsc_data_size::default_size,
1040  int N, typename OffsetObjT, typename RegionTy>
1041 __ESIMD_API void
1042 lsc_prefetch(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1043  __ESIMD_NS::simd_mask<N> pred = 1) {
1044  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
1045 }
1046 
1047 template <typename T, int NElts = 1,
1048  lsc_data_size DS = lsc_data_size::default_size,
1050  int N, typename Toffset>
1051 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
1052 lsc_prefetch(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
1053  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
1054  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
1055 }
1056 
1082 template <typename T, int NElts = 1,
1083  lsc_data_size DS = lsc_data_size::default_size,
1085  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1086 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1087 lsc_prefetch(const T *p, FlagsT = {}) {
1088  __ESIMD_NS::simd_mask<1> Mask = 1;
1089  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1090  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1091  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, 0, Mask);
1092 }
1093 
1111 template <typename T, int NElts = 1,
1112  lsc_data_size DS = lsc_data_size::default_size,
1114  int N, typename AccessorTy>
1115 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1116  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1117 lsc_prefetch(AccessorTy acc,
1118  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1119  __ESIMD_NS::simd_mask<N> pred = 1) {
1120 #ifdef __ESIMD_FORCE_STATELESS_MEM
1121  lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1122  offsets, pred);
1123 #else
1124  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1125  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offsets, pred);
1126 #endif
1127 }
1128 
1129 #ifdef __ESIMD_FORCE_STATELESS_MEM
1130 template <typename T, int NElts = 1,
1131  lsc_data_size DS = lsc_data_size::default_size,
1133  int N, typename AccessorTy, typename Toffset>
1134 __ESIMD_API std::enable_if_t<
1135  __ESIMD_DNS::is_device_accessor_with_v<
1136  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1137  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1138 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1139  __ESIMD_NS::simd_mask<N> pred = 1) {
1140  lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1141  acc, convert<uint64_t>(offsets), pred);
1142 }
1143 #endif
1144 
1172 template <typename T, int NElts = 1,
1173  lsc_data_size DS = lsc_data_size::default_size,
1175  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag,
1176  typename AccessorTy>
1177 __ESIMD_API std::enable_if_t<
1178  __ESIMD_DNS::is_device_accessor_with_v<
1179  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1180  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1182  FlagsT flags = FlagsT{}) {
1183 #ifdef __ESIMD_FORCE_STATELESS_MEM
1184  lsc_prefetch<T, NElts, DS, L1H, L2H>(
1185  __ESIMD_DNS::accessorToPointer<T>(acc, offset), flags);
1186 #else
1187  __ESIMD_NS::simd_mask<1> Mask = 1;
1188  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1189  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1190  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offset, Mask);
1191 #endif
1192 }
1193 
1208 template <typename T, int NElts = 1,
1209  lsc_data_size DS = lsc_data_size::default_size, int N>
1210 __ESIMD_API void lsc_slm_scatter(__ESIMD_NS::simd<uint32_t, N> offsets,
1211  __ESIMD_NS::simd<T, N * NElts> vals,
1212  __ESIMD_NS::simd_mask<N> pred = 1) {
1213  __ESIMD_DNS::slm_scatter_impl<T, NElts, DS>(offsets, vals, pred);
1214 }
1215 
1228 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1229  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1230 __ESIMD_API void lsc_slm_block_store(uint32_t offset,
1231  __ESIMD_NS::simd<T, NElts> vals,
1232  FlagsT flags = FlagsT{}) {
1233  // Make sure we generate an LSC block store
1234  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
1235  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
1236  __ESIMD_NS::simd_mask<1> pred = 1;
1237  __ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
1238 }
1239 
1257 template <typename T, int NElts = 1,
1258  lsc_data_size DS = lsc_data_size::default_size,
1260  int N, typename Toffset>
1261 __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1262  __ESIMD_NS::simd<T, N * NElts> vals,
1263  __ESIMD_NS::simd_mask<N> pred = 1) {
1264  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1265  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT, N, Toffset>(
1266  p, offsets, vals, pred);
1267 }
1268 
1269 template <typename T, int NElts = 1,
1270  lsc_data_size DS = lsc_data_size::default_size,
1272  int N, typename OffsetObjT, typename RegionTy>
1273 __ESIMD_API void
1274 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1275  __ESIMD_NS::simd<T, N * NElts> vals,
1276  __ESIMD_NS::simd_mask<N> pred = 1) {
1277  lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
1278 }
1279 
1280 template <typename T, int NElts = 1,
1281  lsc_data_size DS = lsc_data_size::default_size,
1283  int N, typename Toffset>
1284 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1285 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
1286  __ESIMD_NS::simd_mask<N> pred = 1) {
1287  lsc_scatter<T, NElts, DS, L1H, L2H, N>(
1288  p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
1289 }
1290 
1309 template <typename T, int NElts = 1,
1310  lsc_data_size DS = lsc_data_size::default_size,
1312  int N, typename AccessorTy>
1313 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1314  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1315 lsc_scatter(AccessorTy acc,
1316  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1317  __ESIMD_NS::simd<T, N * NElts> vals,
1318  __ESIMD_NS::simd_mask<N> pred = 1) {
1319 #ifdef __ESIMD_FORCE_STATELESS_MEM
1320  lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1321  offsets, vals, pred);
1322 #else
1323  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1324  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT>(acc, offsets, vals,
1325  pred);
1326 #endif
1327 }
1328 
1329 #ifdef __ESIMD_FORCE_STATELESS_MEM
1330 template <typename T, int NElts = 1,
1331  lsc_data_size DS = lsc_data_size::default_size,
1333  int N, typename AccessorTy, typename Toffset>
1334 __ESIMD_API std::enable_if_t<
1335  __ESIMD_DNS::is_device_accessor_with_v<
1336  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1337  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1338 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1339  __ESIMD_NS::simd<T, N * NElts> vals,
1340  __ESIMD_NS::simd_mask<N> pred = 1) {
1341  lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1342  acc, convert<uint64_t>(offsets), vals, pred);
1343 }
1344 #endif
1345 
1346 template <typename T, int NElts = 1,
1347  lsc_data_size DS = lsc_data_size::default_size,
1349  int N, typename AccessorTy>
1350 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
1351  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1352 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
1353  __ESIMD_NS::simd<T, N * NElts> vals,
1354  __ESIMD_NS::simd_mask<N> pred = 1) {
1355  lsc_slm_scatter<T, NElts, DS>(
1356  offsets + __ESIMD_DNS::localAccessorToOffset(acc), vals, pred);
1357 }
1358 
1391 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1393  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1394 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1395 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
1396  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1397  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1398  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1399  return __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(p, vals, pred);
1400 }
1401 
1430 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1432  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1433 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1434 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1435  lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
1436  flags);
1437 }
1438 
1473 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1475  typename AccessorTy,
1476  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1477 __ESIMD_API std::enable_if_t<
1478  __ESIMD_DNS::is_device_accessor_with_v<
1479  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1480  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1482  __ESIMD_NS::simd<T, NElts> vals,
1483  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1484  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1485  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1486  __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(acc, offset, vals,
1487  pred);
1488 }
1489 
1490 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1492  typename AccessorTy,
1493  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1494 __ESIMD_API std::enable_if_t<
1495  __ESIMD_DNS::is_local_accessor_with_v<
1496  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1497  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1498 lsc_block_store(AccessorTy acc, uint32_t offset,
1499  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags = FlagsT{}) {
1500  lsc_slm_block_store<T, NElts, DS>(
1501  offset + __ESIMD_DNS::localAccessorToOffset(acc), vals, flags);
1502 }
1503 
1534 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1536  typename AccessorTy,
1537  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1538 __ESIMD_API std::enable_if_t<
1539  __ESIMD_DNS::is_accessor_with_v<
1540  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1541  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1543  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1544  lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
1545  __ESIMD_NS::simd_mask<1>(1), flags);
1546 }
1547 
1578 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1579  bool Transposed = false, bool Transformed = false,
1582  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1583 __ESIMD_API __ESIMD_NS::simd<T, N>
1584 lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
1585  unsigned SurfacePitch, int X, int Y) {
1586  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1587  return __ESIMD_DNS::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1588  Transposed, Transformed, PropertyListT>(
1589  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
1590 }
1591 
1614 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1617  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
1618 __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
1619  unsigned SurfaceHeight, unsigned SurfacePitch,
1620  int X, int Y) {
1621  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1622  __ESIMD_DNS::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1623  PropertyListT>(Ptr, SurfaceWidth, SurfaceHeight,
1624  SurfacePitch, X, Y);
1625 }
1626 
1651 template <typename T, int BlockWidth, int BlockHeight = 1,
1654  T, 1u, BlockHeight, BlockWidth, false, false>()>
1655 __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
1656  unsigned SurfaceHeight, unsigned SurfacePitch,
1657  int X, int Y, __ESIMD_NS::simd<T, N> Vals) {
1658  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1659  __ESIMD_DNS::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
1660  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
1661 }
1662 
1670 template <typename T, int BlockWidth, int BlockHeight, int NBlocks>
1672 public:
1676  config_2d_mem_access() : payload_data(0) {
1677  payload_data.template select<1, 1>(7) =
1678  ((NBlocks - 1) << 16) | ((BlockHeight - 1) << 8) | (BlockWidth - 1);
1679  }
1680 
1685  : payload_data(other.payload_data) {}
1686 
1698  config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth,
1699  uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X,
1700  int32_t Y)
1701  : config_2d_mem_access() {
1702  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1703  (uint64_t)Ptr;
1704  payload_data.template select<1, 1>(2) = SurfaceWidth;
1705  payload_data.template select<1, 1>(3) = SurfaceHeight;
1706  payload_data.template select<1, 1>(4) = SurfacePitch;
1707  payload_data.template select<1, 1>(5) = X;
1708  payload_data.template select<1, 1>(6) = Y;
1709  }
1710 
1715  T *get_data_pointer() const {
1716  return (T *)((
1717  uint64_t)(const_cast<config_2d_mem_access *>(this)
1718  ->payload_data.template bit_cast_view<uint64_t>()[0]));
1719  }
1720 
1725  uint32_t get_surface_width() const {
1726  return const_cast<config_2d_mem_access *>(this)
1727  ->payload_data.template select<1, 1>(2);
1728  }
1729 
1734  uint32_t get_surface_height() const {
1735  return const_cast<config_2d_mem_access *>(this)
1736  ->payload_data.template select<1, 1>(3);
1737  }
1738 
1743  uint32_t get_surface_pitch() const {
1744  return const_cast<config_2d_mem_access *>(this)
1745  ->payload_data.template select<1, 1>(4);
1746  }
1747 
1752  int32_t get_x() const {
1753  return const_cast<config_2d_mem_access *>(this)
1754  ->payload_data.template select<1, 1>(5);
1755  }
1756 
1761  int32_t get_y() const {
1762  return const_cast<config_2d_mem_access *>(this)
1763  ->payload_data.template select<1, 1>(6);
1764  }
1765 
1770  constexpr int32_t get_width() const { return BlockWidth; }
1771 
1776  constexpr int32_t get_height() const { return BlockHeight; }
1777 
1782  constexpr int32_t get_number_of_blocks() const { return NBlocks; }
1783 
1790  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1791  (uint64_t)Ptr;
1792  return *this;
1793  }
1794 
1800  config_2d_mem_access &set_surface_width(uint32_t SurfaceWidth) {
1801  payload_data.template select<1, 1>(2) = SurfaceWidth;
1802  return *this;
1803  }
1804 
1810  config_2d_mem_access &set_surface_height(uint32_t SurfaceHeight) {
1811  payload_data.template select<1, 1>(3) = SurfaceHeight;
1812  return *this;
1813  }
1814 
1820  config_2d_mem_access &set_surface_pitch(uint32_t SurfacePitch) {
1821  payload_data.template select<1, 1>(4) = SurfacePitch;
1822  return *this;
1823  }
1824 
1831  payload_data.template select<1, 1>(5) = X;
1832  return *this;
1833  }
1834 
1841  payload_data.template select<1, 1>(6) = Y;
1842  return *this;
1843  }
1844 
1845 private:
1846  __ESIMD_NS::simd<uint32_t, 16> get_raw_data() { return payload_data; }
1847  __ESIMD_NS::simd<uint32_t, 16> payload_data;
1848 
1849  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
1850  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
1851  int N>
1852  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N> lsc_load_2d(
1854 
1855  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
1856  cache_hint L1H, cache_hint L2H, int N>
1857  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(
1859  __ESIMD_NS::simd<T1, N> Data);
1860 
1861  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
1862  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
1863  int N>
1864  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
1866 };
1867 
1887 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1888  bool Transposed = false, bool Transformed = false,
1891  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1892 ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
1894  __ESIMD_DNS::check_lsc_block_2d_restrictions<
1895  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1896  __ESIMD_DNS::block_2d_op::load>();
1897  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1898  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
1899  PropertyListT>();
1900  constexpr int ElemsPerDword = 4 / sizeof(T);
1901  constexpr int GRFRowSize = Transposed ? BlockHeight
1902  : Transformed ? BlockWidth * ElemsPerDword
1903  : BlockWidth;
1904  constexpr int GRFRowPitch = __ESIMD_DNS::getNextPowerOf2<GRFRowSize>();
1905  constexpr int GRFColSize =
1906  Transposed
1907  ? BlockWidth
1908  : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
1909  : BlockHeight);
1910  constexpr int GRFBlockSize = GRFRowPitch * GRFColSize;
1911  constexpr int GRFBlockPitch =
1912  __ESIMD_DNS::roundUpNextMultiple<64 / sizeof(T), GRFBlockSize>();
1913  constexpr int ActualN = NBlocks * GRFBlockPitch;
1914 
1915  constexpr int DstBlockElements = GRFColSize * GRFRowSize;
1916  constexpr int DstElements = DstBlockElements * NBlocks;
1917 
1918  constexpr uint32_t GrfBytes = 64;
1919  constexpr uint32_t DstBlockSize =
1920  __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof(T), GrfBytes>();
1921  constexpr uint32_t DstLength =
1922  (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
1923  constexpr uint32_t DstLengthMask = DstLength << 20;
1924 
1925  static_assert(N == ActualN || N == DstElements, "Incorrect element count");
1926 
1927  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1928  << 17;
1929  constexpr uint32_t base_desc = 0x2000003;
1930  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
1931  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
1932  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
1933  __ESIMD_NS::simd<T, N> oldDst;
1934  constexpr uint32_t exDesc = 0x0;
1935  constexpr uint32_t desc = base_desc | cache_mask | transformMask |
1936  transposeMask | dataSizeMask | DstLengthMask;
1937  constexpr uint8_t execSize = 1;
1938  constexpr uint8_t sfid = 0xF;
1939  constexpr uint8_t numSrc0 = 0x1;
1940  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
1941  __ESIMD_NS::simd<T, ActualN> Raw =
1942  __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
1943  oldDst, payload.get_raw_data(), exDesc, desc);
1944 
1945  if constexpr (ActualN == N) {
1946  return Raw;
1947  } else {
1948  // HW restrictions force data which is read to contain padding filled with
1949  // zeros for 2d lsc loads. This code eliminates such padding.
1950 
1951  __ESIMD_NS::simd<T, DstElements> Dst;
1952 
1953  for (auto i = 0; i < NBlocks; i++) {
1954  auto DstBlock =
1955  Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
1956 
1957  auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
1958  DstBlock = RawBlock.template bit_cast_view<T, GRFColSize, GRFRowPitch>()
1959  .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
1960  .template bit_cast_view<T>();
1961  }
1962 
1963  return Dst;
1964  }
1965 }
1966 
1983 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1984  bool Transposed = false, bool Transformed = false,
1987  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1988 ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
1990  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1991  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
1992  PropertyListT>();
1993  __ESIMD_DNS::check_lsc_block_2d_restrictions<
1994  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1996  static_assert(!Transposed || !Transformed,
1997  "Transposed and transformed is not supported");
1998  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1999  << 17;
2000  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2001  constexpr uint32_t base_desc = 0x2000003;
2002  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2003  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2004  constexpr uint32_t exDesc = 0x0;
2005  constexpr uint32_t desc =
2006  base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2007  constexpr uint8_t execSize = 1;
2008  constexpr uint8_t sfid = 0xF;
2009  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
2010  __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data(), exDesc,
2011  desc);
2012 }
2013 
2029 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2032  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
2033 ESIMD_INLINE SYCL_ESIMD_FUNCTION void
2035  __ESIMD_NS::simd<T, N> Data) {
2036  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2037  T, BlockWidth, BlockHeight, NBlocks, false, false,
2038  __ESIMD_DNS::block_2d_op::store>();
2039  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2040  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store,
2041  PropertyListT>();
2042 
2043  constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2044  << 17;
2045  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2046  constexpr uint32_t base_desc = 0x2000007;
2047 
2048  constexpr uint32_t exDesc = 0x0;
2049  constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2050  constexpr uint8_t execSize = 1;
2051  constexpr uint8_t sfid = 0xF;
2052  constexpr uint8_t numSrc0 = 0x1;
2053  constexpr uint8_t numSrc1 = (N * sizeof(T)) / 64;
2054 
2055  __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1>(
2056  payload.get_raw_data(), Data, exDesc, desc);
2057 }
2058 
2059 namespace detail {
2060 
2061 // lsc_atomic_update() operations may share atomic_op values for data types
2062 // of the same (fp vs integral) class for convenience (e.g. re-use 'fmax' for
2063 // all FP types). In fact those data types may require using different internal
2064 // opcodes. This function returns the corresponding internal opcode for
2065 // the input type 'T' and operation 'Op'.
2066 template <typename T, __ESIMD_NS::atomic_op Op>
2067 constexpr int lsc_to_internal_atomic_op() {
2068  constexpr __ESIMD_NS::native::lsc::atomic_op LSCOp =
2069  __ESIMD_DNS::to_lsc_atomic_op<Op>();
2070  return static_cast<int>(LSCOp);
2071 }
2072 } // namespace detail
2073 
2087 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2088  lsc_data_size DS = lsc_data_size::default_size>
2089 __ESIMD_API __ESIMD_NS::simd<T, N>
2090 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2091  __ESIMD_NS::simd_mask<N> pred) {
2092  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, pred);
2093 }
2094 
2109 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2110  lsc_data_size DS = lsc_data_size::default_size>
2111 __ESIMD_API __ESIMD_NS::simd<T, N>
2112 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2113  __ESIMD_NS::simd<T, N> src0,
2114  __ESIMD_NS::simd_mask<N> pred) {
2115  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, pred);
2116 }
2117 
2133 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2134  lsc_data_size DS = lsc_data_size::default_size>
2135 __ESIMD_API __ESIMD_NS::simd<T, N>
2136 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2137  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2138  __ESIMD_NS::simd_mask<N> pred) {
2139  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, src1,
2140  pred);
2141 }
2142 
2157 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2158  lsc_data_size DS = lsc_data_size::default_size,
2160  typename Toffset>
2161 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
2162  __ESIMD_NS::simd<T, N>>
2163 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2164  __ESIMD_NS::simd_mask<N> pred) {
2165  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2166  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2167  p, offsets, pred);
2168 }
2169 
2170 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2171  lsc_data_size DS = lsc_data_size::default_size,
2173  typename Toffset>
2174 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2175  __ESIMD_DNS::get_num_args<Op>() == 0,
2176  __ESIMD_NS::simd<T, N>>
2177 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
2178  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2179  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
2180 }
2181 
2197 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2198  lsc_data_size DS = lsc_data_size::default_size,
2200  typename Toffset>
2201 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2202  __ESIMD_NS::simd<T, N>>
2203 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2204  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2205  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2206  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2207  p, offsets, src0, pred);
2208 }
2209 
2210 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2211  lsc_data_size DS = lsc_data_size::default_size,
2213  typename OffsetObjT, typename RegionTy>
2214 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2215  __ESIMD_NS::simd<T, N>>
2216 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2217  __ESIMD_NS::simd<T, N> src0,
2218  __ESIMD_NS::simd_mask<N> pred = 1) {
2219  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2220  pred);
2221 }
2222 
2223 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2224  lsc_data_size DS = lsc_data_size::default_size,
2226  typename Toffset>
2227 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2228  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2229  ((Op != __ESIMD_NS::atomic_op::store &&
2230  Op != __ESIMD_NS::atomic_op::xchg) ||
2231  N == 1),
2232  __ESIMD_NS::simd<T, N>>
2233 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2234  __ESIMD_NS::simd_mask<N> pred = 1) {
2235  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2236  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, pred);
2237 }
2238 
2255 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2256  lsc_data_size DS = lsc_data_size::default_size,
2258  typename Toffset>
2259 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2260  __ESIMD_NS::simd<T, N>>
2261 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2262  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2263  __ESIMD_NS::simd_mask<N> pred) {
2264  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2265  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2266  p, offsets, src0, src1, pred);
2267 }
2268 
2269 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2270  lsc_data_size DS = lsc_data_size::default_size,
2272  typename OffsetObjT, typename RegionTy>
2273 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2274  __ESIMD_NS::simd<T, N>>
2275 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2276  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2277  __ESIMD_NS::simd_mask<N> pred = 1) {
2278  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2279  src1, pred);
2280 }
2281 
2282 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2283  lsc_data_size DS = lsc_data_size::default_size,
2285  typename Toffset>
2286 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2287  __ESIMD_DNS::get_num_args<Op>() == 2,
2288  __ESIMD_NS::simd<T, N>>
2289 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2290  __ESIMD_NS::simd<T, N> src1,
2291  __ESIMD_NS::simd_mask<N> pred = 1) {
2292  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2293  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, src1, pred);
2294 }
2295 
2313 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2314  lsc_data_size DS = lsc_data_size::default_size,
2316  typename AccessorTy, typename Toffset>
2317 __ESIMD_API std::enable_if_t<
2318  __ESIMD_DNS::is_device_accessor_with_v<
2319  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
2320  (Op == __ESIMD_NS::atomic_op::load ||
2321  __ESIMD_DNS::is_device_accessor_with_v<
2322  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>),
2323  __ESIMD_NS::simd<T, N>>
2324 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2325  __ESIMD_NS::simd_mask<N> pred) {
2326  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2327  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2328  acc, offsets, pred);
2329 }
2330 
2346 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2347  lsc_data_size DS = lsc_data_size::default_size,
2349  typename AccessorTy>
2350 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2351  __ESIMD_NS::simd<T, N>>
2352 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2353  __ESIMD_NS::simd_mask<N> pred) {
2354  return lsc_slm_atomic_update<Op, T, N, DS>(
2355  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
2356 }
2357 
2376 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2377  lsc_data_size DS = lsc_data_size::default_size,
2379  typename AccessorTy, typename Toffset>
2380 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2381  __ESIMD_NS::simd<T, N>>
2382 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2383  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2384  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2385  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2386  acc, offsets, src0, pred);
2387 }
2388 
2405 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2406  lsc_data_size DS = lsc_data_size::default_size,
2408  typename AccessorTy>
2409 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2410  __ESIMD_NS::simd<T, N>>
2411 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2412  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2413  return lsc_slm_atomic_update<Op, T, N, DS>(
2414  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, pred);
2415 }
2416 
2436 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2437  lsc_data_size DS = lsc_data_size::default_size,
2439  typename AccessorTy, typename Toffset>
2440 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2441  __ESIMD_NS::simd<T, N>>
2442 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2443  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2444  __ESIMD_NS::simd_mask<N> pred) {
2445  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2446  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2447  acc, offsets, src0, src1, pred);
2448 }
2449 
2467 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2468  lsc_data_size DS = lsc_data_size::default_size,
2470  typename AccessorTy>
2471 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2472  __ESIMD_NS::simd<T, N>>
2473 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2474  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2475  __ESIMD_NS::simd_mask<N> pred) {
2476  return lsc_slm_atomic_update<Op, T, N, DS>(
2477  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, src1, pred);
2478 }
2479 
2481 
2484 
2487 
2489 __ESIMD_API int32_t get_hw_thread_id() {
2490 #ifdef __SYCL_DEVICE_ONLY__
2491  return __spirv_BuiltInGlobalHWThreadIDINTEL();
2492 #else
2493  return std::rand();
2494 #endif // __SYCL_DEVICE_ONLY__
2495 }
2497 __ESIMD_API int32_t get_subdevice_id() {
2498 #ifdef __SYCL_DEVICE_ONLY__
2499  return __spirv_BuiltInSubDeviceIDINTEL();
2500 #else
2501  return 0;
2502 #endif
2503 }
2504 
2506 
2511 template <uint8_t NbarCount> __ESIMD_API uint8_t named_barrier_allocate() {
2512  return __esimd_named_barrier_allocate(NbarCount);
2513 }
2514 
2515 } // namespace experimental::esimd
2516 
2517 namespace esimd {
2518 
2522 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2523 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2524  __ESIMD_DNS::get_num_args<Op>() == 0,
2525  simd<T, N>>
2527  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2528  p, offset, mask);
2529 }
2530 
2531 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2532  typename RegionTy>
2533 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0, simd<T, N>>
2535  simd_mask<N> mask = 1) {
2536  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2537  p, offsets, mask);
2538 }
2539 
2540 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2541 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2542  __ESIMD_DNS::get_num_args<Op>() == 0,
2543  simd<T, N>>
2544 atomic_update(T *p, Toffset offset, simd_mask<N> mask = 1) {
2545  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2546  p, offset, mask);
2547 }
2548 
2550 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2551 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2552  __ESIMD_DNS::get_num_args<Op>() == 1,
2553  simd<T, N>>
2555  simd_mask<N> mask) {
2556  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2557  p, offset, src0, mask);
2558 }
2559 
2560 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2561  typename RegionTy>
2562 __ESIMD_API __ESIMD_API
2563  std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1, simd<T, N>>
2565  simd<T, N> src0, simd_mask<N> mask = 1) {
2566  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2567  p, offsets, src0, mask);
2568 }
2569 
2570 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2571 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2572  __ESIMD_DNS::get_num_args<Op>() == 1,
2573  simd<T, N>>
2574 atomic_update(T *p, Toffset offset, simd<T, N> src0, simd_mask<N> mask = 1) {
2575  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2576  p, offset, src0, mask);
2577 }
2578 
2580 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2581 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2582  __ESIMD_DNS::get_num_args<Op>() == 2,
2583  simd<T, N>>
2585  simd_mask<N> mask) {
2586  // 2-argument lsc_atomic_update arguments order matches the standard one -
2587  // expected value first, then new value. But atomic_update uses reverse
2588  // order, hence the src1/src0 swap.
2589  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2590  p, offset, src1, src0, mask);
2591 }
2592 
2593 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2594  typename RegionTy>
2595 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2, simd<T, N>>
2597  simd<T, N> src1, simd_mask<N> mask = 1) {
2598  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2599  p, offsets, src1, src0, mask);
2600 }
2601 
2602 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2603 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2604  __ESIMD_DNS::get_num_args<Op>() == 2,
2605  __ESIMD_NS::simd<T, N>>
2607  simd_mask<N> mask = 1) {
2608  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2609  p, offset, src1, src0, mask);
2610 }
2611 
2612 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2613  typename AccessorTy>
2614 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2615  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2616  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2617  simd<T, N>>
2618 atomic_update(AccessorTy acc, simd<Toffset, N> offset, simd_mask<N> mask) {
2619  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2620  acc, offset, mask);
2621 }
2622 
2623 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2624  typename RegionTy, typename AccessorTy>
2625 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
2626  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2627  simd<T, N>>
2629  simd_mask<N> mask) {
2630  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2631  acc, offsets, mask);
2632 }
2633 
2634 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2635  typename AccessorTy>
2636 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2637  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2638  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2639  simd<T, N>>
2640 atomic_update(AccessorTy acc, Toffset offset, simd_mask<N> mask) {
2641  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2642  acc, offset, mask);
2643 }
2644 
2646 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2647  typename AccessorTy>
2648 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2649  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2650  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2651  simd<T, N>>
2653  simd_mask<N> mask) {
2654  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2655  acc, offset, src0, mask);
2656 }
2657 
2658 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2659  typename RegionTy, typename AccessorTy>
2660 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
2661  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2662  simd<T, N>>
2664  simd<T, N> src0, simd_mask<N> mask) {
2665  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2666  acc, offsets, src0, mask);
2667 }
2668 
2669 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2670  typename AccessorTy>
2671 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2672  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2673  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2674  simd<T, N>>
2675 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0,
2676  simd_mask<N> mask) {
2677  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2678  acc, offset, src0, mask);
2679 }
2680 
2682 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2683  typename AccessorTy>
2684 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2685  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2686  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2687  simd<T, N>>
2689  simd<T, N> src1, simd_mask<N> mask) {
2690  // 2-argument lsc_atomic_update arguments order matches the standard one -
2691  // expected value first, then new value. But atomic_update uses reverse
2692  // order, hence the src1/src0 swap.
2693  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2694  acc, offset, src1, src0, mask);
2695 }
2696 
2697 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2698  typename RegionTy, typename AccessorTy>
2699 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
2700  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2701  simd<T, N>>
2704  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2705  acc, offsets, src1, src0, mask);
2706 }
2707 
2708 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2709  typename AccessorTy>
2710 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2711  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2712  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2713  __ESIMD_NS::simd<T, N>>
2714 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0, simd<T, N> src1,
2715  simd_mask<N> mask) {
2716  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2717  acc, offset, src1, src0, mask);
2718 }
2719 
2720 } // namespace esimd
2721 } // namespace ext::intel
2722 } // namespace _V1
2723 } // namespace sycl
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
The main simd vector class.
Definition: simd.hpp:53
Container class to hold parameters for load2d/store2d functions
Definition: memory.hpp:1671
T * get_data_pointer() const
Get a surface base address
Definition: memory.hpp:1715
config_2d_mem_access & set_x(int32_t X)
Sets top left corner X coordinate of the block
Definition: memory.hpp:1830
constexpr int32_t get_number_of_blocks() const
Get number of blocks
Definition: memory.hpp:1782
constexpr int32_t get_width() const
Get width of the block
Definition: memory.hpp:1770
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload, sycl::ext::intel::esimd::simd< T1, N > Data)
config_2d_mem_access & set_surface_width(uint32_t SurfaceWidth)
Sets surface width
Definition: memory.hpp:1800
int32_t get_x() const
Get top left corner X coordinate of the block
Definition: memory.hpp:1752
config_2d_mem_access & set_data_pointer(T *Ptr)
Sets surface base address
Definition: memory.hpp:1789
config_2d_mem_access & set_surface_height(uint32_t SurfaceHeight)
Sets surface height
Definition: memory.hpp:1810
config_2d_mem_access(const config_2d_mem_access &other)
Copy constructor
Definition: memory.hpp:1684
config_2d_mem_access & set_y(int32_t Y)
Sets top left corner Y coordinate of the block
Definition: memory.hpp:1840
config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth, uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X, int32_t Y)
Constructor
Definition: memory.hpp:1698
config_2d_mem_access & set_surface_pitch(uint32_t SurfacePitch)
Sets surface pitch
Definition: memory.hpp:1820
constexpr int32_t get_height() const
Get height of the block
Definition: memory.hpp:1776
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T1, N > lsc_load_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
int32_t get_y() const
Get top left corner Y coordinate of the block
Definition: memory.hpp:1761
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
__ESIMD_DNS::lsc_data_size lsc_data_size
Definition: common.hpp:28
sycl::ext::intel::esimd::cache_hint cache_hint
L1 or L2 cache hint kinds.
Definition: common.hpp:72
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
__ESIMD_API int32_t get_subdevice_id()
Get subdevice ID.
Definition: memory.hpp:2497
__ESIMD_API int32_t get_hw_thread_id()
Get HW Thread ID.
Definition: memory.hpp:2489
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:184
__ESIMD_API SZ src0
Definition: math.hpp:184
__ESIMD_API void lsc_slm_block_store(uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
Transposed SLM scatter with 1 channel.
Definition: memory.hpp:1230
__ESIMD_API void lsc_prefetch(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer prefetch gather.
Definition: memory.hpp:1031
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > lsc_atomic_update(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
USM pointer atomic.
Definition: memory.hpp:2163
__ESIMD_API void lsc_slm_scatter(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM scatter.
Definition: memory.hpp:1210
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block load.
Definition: memory.hpp:1584
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_slm_atomic_update(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
SLM atomic.
Definition: memory.hpp:2090
__ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
2D USM pointer block store.
Definition: memory.hpp:1655
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > lsc_slm_block_load(uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
Transposed SLM gather with 1 channel.
Definition: memory.hpp:403
__ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block prefetch.
Definition: memory.hpp:1618
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_slm_gather(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM gather.
Definition: memory.hpp:353
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > lsc_block_load(const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed gather with 1 channel.
Definition: memory.hpp:734
__ESIMD_API void lsc_scatter(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer scatter.
Definition: memory.hpp:1261
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > lsc_block_store(T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed scatter with 1 channel.
Definition: memory.hpp:1395
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_gather(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer gather.
Definition: memory.hpp:460
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
Definition: memory.hpp:213
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
Definition: memory.hpp:8020
__ESIMD_API void split_barrier()
Generic work-group split barrier.
Definition: memory.hpp:29
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
Definition: memory.hpp:13888
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw send.
Definition: memory.hpp:110
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, sycl::ext::intel::esimd::simd< T3, n3 > msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw sends.
Definition: memory.hpp:65
typename make_L1_L2_alignment_properties< L1H, L2H, Alignment >::type make_L1_L2_alignment_properties_t
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, N > lsc_format_input(sycl::ext::intel::esimd::simd< T, N > Vals)
Definition: memory.hpp:253
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > lsc_format_ret(sycl::ext::intel::esimd::simd< T1, N > Vals)
Definition: memory.hpp:260
__ESIMD_API uint8_t named_barrier_allocate()
Allocate additional named barriers for a kernel Available only on PVC.
Definition: memory.hpp:2511
Definition: access.hpp:18
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:436