DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory.hpp
Go to the documentation of this file.
1 //==-------------- memory.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement experimental Explicit SIMD memory-access APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
18 
19 namespace sycl {
20 inline namespace _V1 {
21 namespace ext::intel {
22 namespace experimental::esimd {
23 
26 
29 template <split_barrier_action flag> __ESIMD_API void split_barrier() {
30  __esimd_sbarrier(flag);
31 }
32 
33 __SYCL_DEPRECATED("use split_barrier<split_barrier_action>()")
34 __ESIMD_API void split_barrier(split_barrier_action flag) {
35  __esimd_sbarrier(flag);
36 }
37 
39 
42 
67 template <typename T1, int n1, typename T2, int n2, typename T3, int n3,
68  int N = 16>
69 __ESIMD_API __ESIMD_NS::simd<T1, n1>
70 raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
71  __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
72  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
73  uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
74  __ESIMD_NS::simd_mask<N> mask = 1) {
75  constexpr unsigned _Width1 = n1 * sizeof(T1);
76  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
77  constexpr unsigned _Width2 = n2 * sizeof(T2);
78  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
79  constexpr unsigned _Width3 = n3 * sizeof(T3);
80  static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
81 
82  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
83  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
84  using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
85 
86  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
87  return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
88  modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
89  msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
90 }
91 
111 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
112  uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1,
113  int n1, typename T2, int n2, typename T3, int n3>
114 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_sends")
115 __ESIMD_API __ESIMD_NS::simd<T1, n1> raw_sends(
116  __ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
117  __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
118  __ESIMD_NS::simd_mask<execSize> mask = 1) {
119  constexpr unsigned _Width1 = n1 * sizeof(T1);
120  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
121  constexpr unsigned _Width2 = n2 * sizeof(T2);
122  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
123  constexpr unsigned _Width3 = n3 * sizeof(T3);
124  static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
125 
126  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
127  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
128  using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
129 
130  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
131 
132  return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, execSize>(
133  modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
134  msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
135 }
136 
158 template <typename T1, int n1, typename T2, int n2, int N = 16>
159 __ESIMD_API __ESIMD_NS::simd<T1, n1>
160 raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
161  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
162  uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0,
163  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
164  constexpr unsigned _Width1 = n1 * sizeof(T1);
165  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
166  constexpr unsigned _Width2 = n2 * sizeof(T2);
167  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
168 
169  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
170  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
171 
172  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
173  return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
174  modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
175  msgSrc0.data(), msgDst.data());
176 }
177 
195 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst,
196  uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
197  typename T2, int n2>
198 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_send")
199 __ESIMD_API __ESIMD_NS::simd<T1, n1> raw_send(
200  __ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
201  uint32_t exDesc, uint32_t msgDesc,
202  __ESIMD_NS::simd_mask<execSize> mask = 1) {
203  constexpr unsigned _Width1 = n1 * sizeof(T1);
204  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
205  constexpr unsigned _Width2 = n2 * sizeof(T2);
206  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
207 
208  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
209  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
210 
211  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
212  return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, execSize>(
213  modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
214  msgSrc0.data(), msgDst.data());
215 }
216 
237 template <typename T1, int n1, typename T2, int n2, int N = 16>
238 __ESIMD_API void
239 raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
240  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
241  uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0,
242  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
243  constexpr unsigned _Width1 = n1 * sizeof(T1);
244  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
245  constexpr unsigned _Width2 = n2 * sizeof(T2);
246  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
247 
248  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
249  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
250 
251  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
252  __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
253  modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
254  msgSrc0.data(), msgSrc1.data());
255 }
256 
273 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
274  uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
275  typename T2, int n2>
276 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_sends")
277 __ESIMD_API
278  void raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0,
279  __ESIMD_NS::simd<T2, n2> msgSrc1, uint32_t exDesc,
280  uint32_t msgDesc, __ESIMD_NS::simd_mask<execSize> mask = 1) {
281  constexpr unsigned _Width1 = n1 * sizeof(T1);
282  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
283  constexpr unsigned _Width2 = n2 * sizeof(T2);
284  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
285 
286  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
287  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
288 
289  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
290  __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, execSize>(
291  modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
292  msgSrc0.data(), msgSrc1.data());
293 }
294 
313 template <typename T1, int n1, int N = 16>
314 __ESIMD_API void
315 raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
316  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
317  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
318  constexpr unsigned _Width1 = n1 * sizeof(T1);
319  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
320  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
321  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
322  __esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
323  numSrc0, sfid, exDesc, msgDesc,
324  msgSrc0.data());
325 }
326 
342 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
343  uint8_t isSendc = 0, typename T1, int n1>
344 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_send")
345 __ESIMD_API
346  void raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc,
347  uint32_t msgDesc, __ESIMD_NS::simd_mask<execSize> mask = 1) {
348  constexpr unsigned _Width1 = n1 * sizeof(T1);
349  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
350  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
351  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
352  __esimd_raw_send2_noresult<ElemT1, n1, execSize>(
353  modifier, execSize, mask.data(), numSrc0, sfid, exDesc, msgDesc,
354  msgSrc0.data());
355 }
356 
358 
361 
364 
369 __ESIMD_API void named_barrier_wait(uint8_t id) {
370  __esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
371 }
372 
377 template <uint8_t NbarCount> __ESIMD_API void named_barrier_init() {
378  __esimd_nbarrier_init(NbarCount);
379 }
380 
393 __ESIMD_API void named_barrier_signal(uint8_t barrier_id,
394  uint8_t producer_consumer_mode,
395  uint32_t num_producers,
396  uint32_t num_consumers) {
399 #ifdef __ESIMD_USE_NEW_NAMED_BARRIER_INTRIN
400  __esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
401  num_consumers);
402 #else
403  constexpr uint32_t gateway = 3;
404  constexpr uint32_t barrier = 4;
405  constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
406  0 << 12 | // Fence Data Ports: No fence
407  barrier; // Barrier subfunction
408 
409  __ESIMD_DNS::vector_type_t<uint32_t, 8> payload = 0;
410  payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
411  producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;
412  __esimd_raw_send_nbarrier_signal<uint32_t, 8>(
413  0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
414 #endif
415 }
416 
420 template <typename T, int N>
421 __ESIMD_API std::enable_if_t<(sizeof(T) * N >= 2)>
422 wait(__ESIMD_NS::simd<T, N> value) {
423 #ifdef __SYCL_DEVICE_ONLY__
424  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
425  __esimd_wait(Word);
426 #endif // __SYCL_DEVICE_ONLY__
427 }
428 
432 template <typename T, typename RegionT>
433 __ESIMD_API std::enable_if_t<
434  (RegionT::length * sizeof(typename RegionT::element_type) >= 2)>
435 wait(__ESIMD_NS::simd_view<T, RegionT> value) {
436 #ifdef __SYCL_DEVICE_ONLY__
437  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
438  __esimd_wait(Word);
439 #endif // __SYCL_DEVICE_ONLY__
440 }
441 
443 
446 
449 
450 namespace detail {
451 // Compute the data size for 2d block load or store.
452 template <typename T, int NBlocks, int Height, int Width, bool Transposed,
453  bool Transformed>
454 constexpr int get_lsc_block_2d_data_size() {
455  return __ESIMD_DNS::get_lsc_block_2d_data_size<T, NBlocks, Height, Width,
456  Transposed, Transformed>();
457 }
458 
459 // Format u8 and u16 to u8u32 and u16u32 by doing garbage-extension.
460 template <typename RT, typename T, int N>
461 ESIMD_INLINE __ESIMD_NS::simd<RT, N>
462 lsc_format_input(__ESIMD_NS::simd<T, N> Vals) {
463  return __ESIMD_DNS::lsc_format_input<RT, T, N>(Vals);
464 }
465 
466 // Format u8u32 and u16u32 back to u8 and u16.
467 template <typename T, typename T1, int N>
468 ESIMD_INLINE __ESIMD_NS::simd<T, N>
469 lsc_format_ret(__ESIMD_NS::simd<T1, N> Vals) {
470  return __ESIMD_DNS::lsc_format_ret<T, T1, N>(Vals);
471 }
472 
473 template <typename T> constexpr uint32_t get_lsc_data_size() {
474  switch (sizeof(T)) {
475  case 1:
476  return 0;
477  case 2:
478  return 1;
479  case 4:
480  return 2;
481  case 8:
482  return 3;
483  default:
484  static_assert(true, "Unsupported data type.");
485  }
486 }
487 
488 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
489 constexpr uint32_t get_lsc_load_cache_mask() {
490  if constexpr (L1H == cache_hint::read_invalidate &&
491  L2H == cache_hint::cached) {
492  return 7;
493  }
494  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
495  return 6;
496  }
497  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
498  return 5;
499  }
500  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
501  return 4;
502  }
503  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
504  return 3;
505  }
506  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
507  return 2;
508  }
509  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
510  return 1;
511  }
512  return 0;
513 }
514 
515 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
516 constexpr uint32_t get_lsc_store_cache_mask() {
517  if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
518  return 7;
519  }
520  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
521  return 6;
522  }
523  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
524  return 5;
525  }
526  if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
527  return 4;
528  }
529  if constexpr (L1H == cache_hint::write_through &&
530  L2H == cache_hint::uncached) {
531  return 3;
532  }
533  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
534  return 2;
535  }
536  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
537  return 1;
538  }
539  return 0;
540 }
541 
542 } // namespace detail
543 
559 template <typename T, int NElts = 1,
560  lsc_data_size DS = lsc_data_size::default_size, int N>
561 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
562 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
563  __ESIMD_NS::simd_mask<N> pred = 1) {
564  __ESIMD_NS::simd<T, N * NElts> pass_thru;
565  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
566 }
567 
585 template <typename T, int NElts = 1,
586  lsc_data_size DS = lsc_data_size::default_size, int N>
587 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
588 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
589  __ESIMD_NS::simd_mask<N> pred,
590  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
591  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
592 }
593 
609 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
610  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
611 __ESIMD_API __ESIMD_NS::simd<T, NElts>
612 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred = 1,
613  FlagsT flags = FlagsT{}) {
614  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
615  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
616  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, Props);
617 }
618 
636 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
637  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
638 __ESIMD_API __ESIMD_NS::simd<T, NElts>
639 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
640  __ESIMD_NS::simd<T, NElts> pass_thru) {
641  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
642  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
643  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, pass_thru, Props);
644 }
645 
664 template <typename T, int NElts = 1,
665  lsc_data_size DS = lsc_data_size::default_size,
667  int N, typename Toffset>
668 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
669 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
670  __ESIMD_NS::simd_mask<N> pred = 1) {
671  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
672  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally undefined.
673  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
674  PassThru);
675 }
676 
697 template <typename T, int NElts = 1,
698  lsc_data_size DS = lsc_data_size::default_size,
700  int N, typename Toffset>
701 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
702 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
703  __ESIMD_NS::simd_mask<N> pred,
704  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
705  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
706  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
707  pass_thru);
708 }
709 
710 template <typename T, int NElts = 1,
711  lsc_data_size DS = lsc_data_size::default_size,
713  int N, typename OffsetObjT, typename RegionTy>
714 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
715 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
716  __ESIMD_NS::simd_mask<N> pred = 1) {
717  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
718 }
719 
720 template <typename T, int NElts = 1,
721  lsc_data_size DS = lsc_data_size::default_size,
723  int N, typename OffsetObjT, typename RegionTy>
724 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
725 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
726  __ESIMD_NS::simd_mask<N> pred,
727  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
728  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
729  pass_thru);
730 }
731 
732 template <typename T, int NElts = 1,
733  lsc_data_size DS = lsc_data_size::default_size,
735  int N, typename Toffset>
736 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
737  __ESIMD_NS::simd<T, N * NElts>>
738 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
739  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
740  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
741 }
742 
743 template <typename T, int NElts = 1,
744  lsc_data_size DS = lsc_data_size::default_size,
746  int N, typename Toffset>
747 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
748  __ESIMD_NS::simd<T, N * NElts>>
749 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
750  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
751  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
752  p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
753 }
754 
774 template <typename T, int NElts = 1,
775  lsc_data_size DS = lsc_data_size::default_size,
777  int N, typename AccessorTy>
778 __ESIMD_API
779  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
780  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
781  __ESIMD_NS::simd<T, N * NElts>>
782  lsc_gather(AccessorTy acc,
783  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
784  __ESIMD_NS::simd_mask<N> pred = 1) {
785 #ifdef __ESIMD_FORCE_STATELESS_MEM
786  return lsc_gather<T, NElts, DS, L1H, L2H>(
787  __ESIMD_DNS::accessorToPointer<T>(acc), offsets, pred);
788 #else
789  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally uninitialized.
790  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
791  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
792  acc, offsets, pred, PassThru);
793 #endif // __ESIMD_FORCE_STATELESS_MEM
794 }
795 
796 #ifdef __ESIMD_FORCE_STATELESS_MEM
797 template <typename T, int NElts = 1,
798  lsc_data_size DS = lsc_data_size::default_size,
800  int N, typename AccessorTy, typename Toffset>
801 __ESIMD_API std::enable_if_t<
802  __ESIMD_DNS::is_device_accessor_with_v<
803  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
804  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
805  __ESIMD_NS::simd<T, N * NElts>>
806 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
807  __ESIMD_NS::simd_mask<N> pred = 1) {
808  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
809  acc, convert<uint64_t>(offsets), pred);
810 }
811 #endif
812 
813 template <typename T, int NElts = 1,
814  lsc_data_size DS = lsc_data_size::default_size,
816  int N, typename AccessorTy>
817 __ESIMD_API
818  std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
819  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
820  __ESIMD_NS::simd<T, N * NElts>>
821  lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
822  __ESIMD_NS::simd_mask<N> pred = 1) {
823  return lsc_slm_gather<T, NElts, DS>(
824  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
825 }
826 
848 template <typename T, int NElts = 1,
849  lsc_data_size DS = lsc_data_size::default_size,
851  int N, typename AccessorTy>
852 __ESIMD_API
853  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
854  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
855  __ESIMD_NS::simd<T, N * NElts>>
856  lsc_gather(AccessorTy acc,
857  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
858  __ESIMD_NS::simd_mask<N> pred,
859  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
860 #ifdef __ESIMD_FORCE_STATELESS_MEM
861  return lsc_gather<T, NElts, DS, L1H, L2H>(
862  reinterpret_cast<T *>(__ESIMD_DNS::accessorToPointer<T>(acc)), offsets,
863  pred, pass_thru);
864 
865 #else
866  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
867  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
868  acc, offsets, pred, pass_thru);
869 #endif // __ESIMD_FORCE_STATELESS_MEM
870 }
871 
872 #ifdef __ESIMD_FORCE_STATELESS_MEM
873 template <typename T, int NElts = 1,
874  lsc_data_size DS = lsc_data_size::default_size,
876  int N, typename AccessorTy, typename Toffset>
877 __ESIMD_API std::enable_if_t<
878  __ESIMD_DNS::is_device_accessor_with_v<
879  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
880  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
881  __ESIMD_NS::simd<T, N * NElts>>
882 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
883  __ESIMD_NS::simd_mask<N> pred,
884  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
885  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
886  acc, convert<uint64_t>(offsets), pred, pass_thru);
887 }
888 #endif
889 
890 template <typename T, int NElts = 1,
891  lsc_data_size DS = lsc_data_size::default_size,
893  int N, typename AccessorTy>
894 __ESIMD_API std::enable_if_t<
895  sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
896  __ESIMD_NS::simd<T, N * NElts>>
897 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
898  __ESIMD_NS::simd_mask<N> pred,
899  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
900  return lsc_slm_gather<T, NElts, DS>(
901  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru);
902 }
903 
939 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
941  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
942 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
943  __ESIMD_NS::simd<T, NElts>>
944 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
946  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
947  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
948  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
949  PassThru);
950 }
951 
980 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
982  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
983 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
984  __ESIMD_NS::simd<T, NElts>>
985 lsc_block_load(const T *p, FlagsT) {
987  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
988  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
989  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(
990  p, __ESIMD_NS::simd_mask<1>(1), PassThru);
991 }
992 
1024 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1026  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1027 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1028  __ESIMD_NS::simd<T, NElts>>
1029 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
1030  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1031  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1032  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1033  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
1034  pass_thru);
1035 }
1036 
1068 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1070  typename AccessorTy,
1071  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1072 __ESIMD_API std::enable_if_t<
1073  __ESIMD_DNS::is_device_accessor_with_v<
1074  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1075  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1076  __ESIMD_NS::simd<T, NElts>>
1078  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1079  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1080  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1081  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1082  pred);
1083 }
1084 
1085 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1087  typename AccessorTy,
1088  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1089 __ESIMD_API std::enable_if_t<
1090  __ESIMD_DNS::is_local_accessor_with_v<
1091  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1092  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1093  __ESIMD_NS::simd<T, NElts>>
1094 lsc_block_load(AccessorTy acc, uint32_t offset,
1095  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1096  return lsc_slm_block_load<T, NElts, DS>(
1097  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, flags);
1098 }
1099 
1127 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1129  typename AccessorTy,
1130  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1131 __ESIMD_API std::enable_if_t<
1132  __ESIMD_DNS::is_device_accessor_with_v<
1133  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1134  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1135  __ESIMD_NS::simd<T, NElts>>
1137  FlagsT flags) {
1138  return lsc_block_load<T, NElts, DS, L1H, L2H>(
1139  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1140 }
1141 
1142 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1144  typename AccessorTy,
1145  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1146 __ESIMD_API std::enable_if_t<
1147  __ESIMD_DNS::is_local_accessor_with_v<
1148  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1149  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1150  __ESIMD_NS::simd<T, NElts>>
1151 lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
1152  return lsc_block_load<T, NElts, DS, L1H, L2H>(
1153  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1154 }
1155 
1188 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1190  typename AccessorTy,
1191  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1192 __ESIMD_API std::enable_if_t<
1193  __ESIMD_DNS::is_device_accessor_with_v<
1194  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1195  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1196  __ESIMD_NS::simd<T, NElts>>
1198  __ESIMD_NS::simd_mask<1> pred,
1199  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1200  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1201  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1202  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1203  pred, pass_thru);
1204 }
1205 
1206 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1208  typename AccessorTy,
1209  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1210 __ESIMD_API std::enable_if_t<
1211  __ESIMD_DNS::is_local_accessor_with_v<
1212  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1213  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1214  __ESIMD_NS::simd<T, NElts>>
1215 lsc_block_load(AccessorTy acc, uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
1216  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
1217  return lsc_slm_block_load<T, NElts, DS>(
1218  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru, flags);
1219 }
1220 
1237 template <typename T, int NElts = 1,
1238  lsc_data_size DS = lsc_data_size::default_size,
1240  int N, typename Toffset>
1241 __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1242  __ESIMD_NS::simd_mask<N> pred = 1) {
1243  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1244  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, offsets, pred);
1245 }
1246 
1247 template <typename T, int NElts = 1,
1248  lsc_data_size DS = lsc_data_size::default_size,
1250  int N, typename OffsetObjT, typename RegionTy>
1251 __ESIMD_API void
1252 lsc_prefetch(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1253  __ESIMD_NS::simd_mask<N> pred = 1) {
1254  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
1255 }
1256 
1257 template <typename T, int NElts = 1,
1258  lsc_data_size DS = lsc_data_size::default_size,
1260  int N, typename Toffset>
1261 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
1262 lsc_prefetch(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
1263  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
1264  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
1265 }
1266 
1292 template <typename T, int NElts = 1,
1293  lsc_data_size DS = lsc_data_size::default_size,
1295  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1296 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1297 lsc_prefetch(const T *p, FlagsT = {}) {
1298  __ESIMD_NS::simd_mask<1> Mask = 1;
1299  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1300  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1301  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, 0, Mask);
1302 }
1303 
1321 template <typename T, int NElts = 1,
1322  lsc_data_size DS = lsc_data_size::default_size,
1324  int N, typename AccessorTy>
1325 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1326  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1327 lsc_prefetch(AccessorTy acc,
1328  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1329  __ESIMD_NS::simd_mask<N> pred = 1) {
1330 #ifdef __ESIMD_FORCE_STATELESS_MEM
1331  lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1332  offsets, pred);
1333 #else
1334  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1335  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offsets, pred);
1336 #endif
1337 }
1338 
1339 #ifdef __ESIMD_FORCE_STATELESS_MEM
1340 template <typename T, int NElts = 1,
1341  lsc_data_size DS = lsc_data_size::default_size,
1343  int N, typename AccessorTy, typename Toffset>
1344 __ESIMD_API std::enable_if_t<
1345  __ESIMD_DNS::is_device_accessor_with_v<
1346  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1347  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1348 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1349  __ESIMD_NS::simd_mask<N> pred = 1) {
1350  lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1351  acc, convert<uint64_t>(offsets), pred);
1352 }
1353 #endif
1354 
1382 template <typename T, int NElts = 1,
1383  lsc_data_size DS = lsc_data_size::default_size,
1385  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag,
1386  typename AccessorTy>
1387 __ESIMD_API std::enable_if_t<
1388  __ESIMD_DNS::is_device_accessor_with_v<
1389  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1390  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1392  FlagsT flags = FlagsT{}) {
1393 #ifdef __ESIMD_FORCE_STATELESS_MEM
1394  lsc_prefetch<T, NElts, DS, L1H, L2H>(
1395  __ESIMD_DNS::accessorToPointer<T>(acc, offset), flags);
1396 #else
1397  __ESIMD_NS::simd_mask<1> Mask = 1;
1398  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1399  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1400  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offset, Mask);
1401 #endif
1402 }
1403 
1418 template <typename T, int NElts = 1,
1419  lsc_data_size DS = lsc_data_size::default_size, int N>
1420 __ESIMD_API void lsc_slm_scatter(__ESIMD_NS::simd<uint32_t, N> offsets,
1421  __ESIMD_NS::simd<T, N * NElts> vals,
1422  __ESIMD_NS::simd_mask<N> pred = 1) {
1423  __ESIMD_DNS::slm_scatter_impl<T, NElts, DS>(offsets, vals, pred);
1424 }
1425 
1438 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1439  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1440 __ESIMD_API void lsc_slm_block_store(uint32_t offset,
1441  __ESIMD_NS::simd<T, NElts> vals,
1442  FlagsT flags = FlagsT{}) {
1443  // Make sure we generate an LSC block store
1444  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
1445  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
1446  __ESIMD_NS::simd_mask<1> pred = 1;
1447  __ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
1448 }
1449 
1467 template <typename T, int NElts = 1,
1468  lsc_data_size DS = lsc_data_size::default_size,
1470  int N, typename Toffset>
1471 __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1472  __ESIMD_NS::simd<T, N * NElts> vals,
1473  __ESIMD_NS::simd_mask<N> pred = 1) {
1474  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1475  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT, N, Toffset>(
1476  p, offsets, vals, pred);
1477 }
1478 
1479 template <typename T, int NElts = 1,
1480  lsc_data_size DS = lsc_data_size::default_size,
1482  int N, typename OffsetObjT, typename RegionTy>
1483 __ESIMD_API void
1484 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1485  __ESIMD_NS::simd<T, N * NElts> vals,
1486  __ESIMD_NS::simd_mask<N> pred = 1) {
1487  lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
1488 }
1489 
1490 template <typename T, int NElts = 1,
1491  lsc_data_size DS = lsc_data_size::default_size,
1493  int N, typename Toffset>
1494 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1495 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
1496  __ESIMD_NS::simd_mask<N> pred = 1) {
1497  lsc_scatter<T, NElts, DS, L1H, L2H, N>(
1498  p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
1499 }
1500 
1519 template <typename T, int NElts = 1,
1520  lsc_data_size DS = lsc_data_size::default_size,
1522  int N, typename AccessorTy>
1523 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1524  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1525 lsc_scatter(AccessorTy acc,
1526  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1527  __ESIMD_NS::simd<T, N * NElts> vals,
1528  __ESIMD_NS::simd_mask<N> pred = 1) {
1529 #ifdef __ESIMD_FORCE_STATELESS_MEM
1530  lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1531  offsets, vals, pred);
1532 #else
1533  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1534  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT>(acc, offsets, vals,
1535  pred);
1536 #endif
1537 }
1538 
1539 #ifdef __ESIMD_FORCE_STATELESS_MEM
1540 template <typename T, int NElts = 1,
1541  lsc_data_size DS = lsc_data_size::default_size,
1543  int N, typename AccessorTy, typename Toffset>
1544 __ESIMD_API std::enable_if_t<
1545  __ESIMD_DNS::is_device_accessor_with_v<
1546  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1547  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1548 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1549  __ESIMD_NS::simd<T, N * NElts> vals,
1550  __ESIMD_NS::simd_mask<N> pred = 1) {
1551  lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1552  acc, convert<uint64_t>(offsets), vals, pred);
1553 }
1554 #endif
1555 
1556 template <typename T, int NElts = 1,
1557  lsc_data_size DS = lsc_data_size::default_size,
1559  int N, typename AccessorTy>
1560 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
1561  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1562 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
1563  __ESIMD_NS::simd<T, N * NElts> vals,
1564  __ESIMD_NS::simd_mask<N> pred = 1) {
1565  lsc_slm_scatter<T, NElts, DS>(
1566  offsets + __ESIMD_DNS::localAccessorToOffset(acc), vals, pred);
1567 }
1568 
1601 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1603  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1604 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1605 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
1606  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1607  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1608  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1609  return __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(p, vals, pred);
1610 }
1611 
1640 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1642  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1643 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1644 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1645  lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
1646  flags);
1647 }
1648 
1683 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1685  typename AccessorTy,
1686  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1687 __ESIMD_API std::enable_if_t<
1688  __ESIMD_DNS::is_device_accessor_with_v<
1689  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1690  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1692  __ESIMD_NS::simd<T, NElts> vals,
1693  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1694  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1695  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1696  __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(acc, offset, vals,
1697  pred);
1698 }
1699 
1700 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1702  typename AccessorTy,
1703  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1704 __ESIMD_API std::enable_if_t<
1705  __ESIMD_DNS::is_local_accessor_with_v<
1706  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1707  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1708 lsc_block_store(AccessorTy acc, uint32_t offset,
1709  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags = FlagsT{}) {
1710  lsc_slm_block_store<T, NElts, DS>(
1711  offset + __ESIMD_DNS::localAccessorToOffset(acc), vals, flags);
1712 }
1713 
1744 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1746  typename AccessorTy,
1747  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1748 __ESIMD_API std::enable_if_t<
1749  __ESIMD_DNS::is_accessor_with_v<
1750  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1751  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1753  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1754  lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
1755  __ESIMD_NS::simd_mask<1>(1), flags);
1756 }
1757 
1788 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1789  bool Transposed = false, bool Transformed = false,
1792  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1793 __ESIMD_API __ESIMD_NS::simd<T, N>
1794 lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
1795  unsigned SurfacePitch, int X, int Y) {
1796  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1797  return __ESIMD_DNS::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1798  Transposed, Transformed, PropertyListT>(
1799  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
1800 }
1801 
1824 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1827  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
1828 __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
1829  unsigned SurfaceHeight, unsigned SurfacePitch,
1830  int X, int Y) {
1831  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1832  __ESIMD_DNS::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1833  PropertyListT>(Ptr, SurfaceWidth, SurfaceHeight,
1834  SurfacePitch, X, Y);
1835 }
1836 
1861 template <typename T, int BlockWidth, int BlockHeight = 1,
1864  T, 1u, BlockHeight, BlockWidth, false, false>()>
1865 __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
1866  unsigned SurfaceHeight, unsigned SurfacePitch,
1867  int X, int Y, __ESIMD_NS::simd<T, N> Vals) {
1868  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1869  __ESIMD_DNS::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
1870  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
1871 }
1872 
1880 template <typename T, int BlockWidth, int BlockHeight, int NBlocks>
1882 public:
1886  config_2d_mem_access() : payload_data(0) {
1887  payload_data.template select<1, 1>(7) =
1888  ((NBlocks - 1) << 16) | ((BlockHeight - 1) << 8) | (BlockWidth - 1);
1889  }
1890 
1895  : payload_data(other.payload) {}
1896 
1908  config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth,
1909  uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X,
1910  int32_t Y)
1911  : config_2d_mem_access() {
1912  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1913  (uint64_t)Ptr;
1914  payload_data.template select<1, 1>(2) = SurfaceWidth;
1915  payload_data.template select<1, 1>(3) = SurfaceHeight;
1916  payload_data.template select<1, 1>(4) = SurfacePitch;
1917  payload_data.template select<1, 1>(5) = X;
1918  payload_data.template select<1, 1>(6) = Y;
1919  }
1920 
1925  T *get_data_pointer() const {
1926  return (T *)((
1927  uint64_t)(const_cast<config_2d_mem_access *>(this)
1928  ->payload_data.template bit_cast_view<uint64_t>()[0]));
1929  }
1930 
1935  uint32_t get_surface_width() const {
1936  return const_cast<config_2d_mem_access *>(this)
1937  ->payload_data.template select<1, 1>(2);
1938  }
1939 
1944  uint32_t get_surface_height() const {
1945  return const_cast<config_2d_mem_access *>(this)
1946  ->payload_data.template select<1, 1>(3);
1947  }
1948 
1953  uint32_t get_surface_pitch() const {
1954  return const_cast<config_2d_mem_access *>(this)
1955  ->payload_data.template select<1, 1>(4);
1956  }
1957 
1962  int32_t get_x() const {
1963  return const_cast<config_2d_mem_access *>(this)
1964  ->payload_data.template select<1, 1>(5);
1965  }
1966 
1971  int32_t get_y() const {
1972  return const_cast<config_2d_mem_access *>(this)
1973  ->payload_data.template select<1, 1>(6);
1974  }
1975 
1980  constexpr int32_t get_width() const { return BlockWidth; }
1981 
1986  constexpr int32_t get_height() const { return BlockHeight; }
1987 
1992  constexpr int32_t get_number_of_blocks() const { return NBlocks; }
1993 
2000  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
2001  (uint64_t)Ptr;
2002  return *this;
2003  }
2004 
2010  config_2d_mem_access &set_surface_width(uint32_t SurfaceWidth) {
2011  payload_data.template select<1, 1>(2) = SurfaceWidth;
2012  return *this;
2013  }
2014 
2020  config_2d_mem_access &set_surface_height(uint32_t SurfaceHeight) {
2021  payload_data.template select<1, 1>(3) = SurfaceHeight;
2022  return *this;
2023  }
2024 
2030  config_2d_mem_access &set_surface_pitch(uint32_t SurfacePitch) {
2031  payload_data.template select<1, 1>(4) = SurfacePitch;
2032  return *this;
2033  }
2034 
2041  payload_data.template select<1, 1>(5) = X;
2042  return *this;
2043  }
2044 
2051  payload_data.template select<1, 1>(6) = Y;
2052  return *this;
2053  }
2054 
2055 private:
2056  __ESIMD_NS::simd<uint32_t, 16> get_raw_data() { return payload_data; }
2057  __ESIMD_NS::simd<uint32_t, 16> payload_data;
2058 
2059  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2060  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
2061  int N>
2062  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N> lsc_load_2d(
2064 
2065  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2066  cache_hint L1H, cache_hint L2H, int N>
2067  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(
2069  __ESIMD_NS::simd<T1, N> Data);
2070 
2071  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2072  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
2073  int N>
2074  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
2076 };
2077 
2097 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2098  bool Transposed = false, bool Transformed = false,
2101  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2102 ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
2104  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2105  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2106  __ESIMD_DNS::block_2d_op::load>();
2107  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2108  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
2109  PropertyListT>();
2110  constexpr int ElemsPerDword = 4 / sizeof(T);
2111  constexpr int GRFRowSize = Transposed ? BlockHeight
2112  : Transformed ? BlockWidth * ElemsPerDword
2113  : BlockWidth;
2114  constexpr int GRFRowPitch = __ESIMD_DNS::getNextPowerOf2<GRFRowSize>();
2115  constexpr int GRFColSize =
2116  Transposed
2117  ? BlockWidth
2118  : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
2119  : BlockHeight);
2120  constexpr int GRFBlockSize = GRFRowPitch * GRFColSize;
2121  constexpr int GRFBlockPitch =
2122  __ESIMD_DNS::roundUpNextMultiple<64 / sizeof(T), GRFBlockSize>();
2123  constexpr int ActualN = NBlocks * GRFBlockPitch;
2124 
2125  constexpr int DstBlockElements = GRFColSize * GRFRowSize;
2126  constexpr int DstElements = DstBlockElements * NBlocks;
2127 
2128  constexpr uint32_t GrfBytes = 64;
2129  constexpr uint32_t DstBlockSize =
2130  __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof(T), GrfBytes>();
2131  constexpr uint32_t DstLength =
2132  (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
2133  constexpr uint32_t DstLengthMask = DstLength << 20;
2134 
2135  static_assert(N == ActualN || N == DstElements, "Incorrect element count");
2136 
2137  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2138  << 17;
2139  constexpr uint32_t base_desc = 0x2000003;
2140  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2141  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2142  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2143  __ESIMD_NS::simd<T, N> oldDst;
2144  constexpr uint32_t exDesc = 0x0;
2145  constexpr uint32_t desc = base_desc | cache_mask | transformMask |
2146  transposeMask | dataSizeMask | DstLengthMask;
2147  constexpr uint8_t execSize = 1;
2148  constexpr uint8_t sfid = 0xF;
2149  constexpr uint8_t numSrc0 = 0x1;
2150  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
2151  __ESIMD_NS::simd<T, ActualN> Raw =
2152  __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
2153  oldDst, payload.get_raw_data(), exDesc, desc);
2154 
2155  if constexpr (ActualN == N) {
2156  return Raw;
2157  } else {
2158  // HW restrictions force data which is read to contain padding filled with
2159  // zeros for 2d lsc loads. This code eliminates such padding.
2160 
2161  __ESIMD_NS::simd<T, DstElements> Dst;
2162 
2163  for (auto i = 0; i < NBlocks; i++) {
2164  auto DstBlock =
2165  Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
2166 
2167  auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
2168  DstBlock = RawBlock.template bit_cast_view<T, GRFColSize, GRFRowPitch>()
2169  .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
2170  .template bit_cast_view<T>();
2171  }
2172 
2173  return Dst;
2174  }
2175 }
2176 
2193 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2194  bool Transposed = false, bool Transformed = false,
2197  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2198 ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
2200  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2201  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
2202  PropertyListT>();
2203  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2204  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2206  static_assert(!Transposed || !Transformed,
2207  "Transposed and transformed is not supported");
2208  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2209  << 17;
2210  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2211  constexpr uint32_t base_desc = 0x2000003;
2212  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2213  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2214  constexpr uint32_t exDesc = 0x0;
2215  constexpr uint32_t desc =
2216  base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2217  constexpr uint8_t execSize = 1;
2218  constexpr uint8_t sfid = 0xF;
2219  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
2220  __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data(), exDesc,
2221  desc);
2222 }
2223 
2239 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2242  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
2243 ESIMD_INLINE SYCL_ESIMD_FUNCTION void
2245  __ESIMD_NS::simd<T, N> Data) {
2246  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2247  T, BlockWidth, BlockHeight, NBlocks, false, false,
2248  __ESIMD_DNS::block_2d_op::store>();
2249  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2250  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store,
2251  PropertyListT>();
2252 
2253  constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2254  << 17;
2255  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2256  constexpr uint32_t base_desc = 0x2000007;
2257 
2258  constexpr uint32_t exDesc = 0x0;
2259  constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2260  constexpr uint8_t execSize = 1;
2261  constexpr uint8_t sfid = 0xF;
2262  constexpr uint8_t numSrc0 = 0x1;
2263  constexpr uint8_t numSrc1 = (N * sizeof(T)) / 64;
2264 
2265  __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1>(
2266  payload.get_raw_data(), Data, exDesc, desc);
2267 }
2268 
2269 namespace detail {
2270 
2271 // lsc_atomic_update() operations may share atomic_op values for data types
2272 // of the same (fp vs integral) class for convenience (e.g. re-use 'fmax' for
2273 // all FP types). In fact those data types may require using different internal
2274 // opcodes. This function returns the corresponding internal opcode for
2275 // the input type 'T' and operation 'Op'.
2276 template <typename T, __ESIMD_NS::atomic_op Op>
2277 constexpr int lsc_to_internal_atomic_op() {
2278  constexpr __ESIMD_NS::native::lsc::atomic_op LSCOp =
2279  __ESIMD_DNS::to_lsc_atomic_op<Op>();
2280  return static_cast<int>(LSCOp);
2281 }
2282 } // namespace detail
2283 
2297 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2298  lsc_data_size DS = lsc_data_size::default_size>
2299 __ESIMD_API __ESIMD_NS::simd<T, N>
2300 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2301  __ESIMD_NS::simd_mask<N> pred) {
2302  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, pred);
2303 }
2304 
2319 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2320  lsc_data_size DS = lsc_data_size::default_size>
2321 __ESIMD_API __ESIMD_NS::simd<T, N>
2322 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2323  __ESIMD_NS::simd<T, N> src0,
2324  __ESIMD_NS::simd_mask<N> pred) {
2325  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, pred);
2326 }
2327 
2343 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2344  lsc_data_size DS = lsc_data_size::default_size>
2345 __ESIMD_API __ESIMD_NS::simd<T, N>
2346 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2347  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2348  __ESIMD_NS::simd_mask<N> pred) {
2349  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, src1,
2350  pred);
2351 }
2352 
2367 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2368  lsc_data_size DS = lsc_data_size::default_size,
2370  typename Toffset>
2371 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
2372  __ESIMD_NS::simd<T, N>>
2373 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2374  __ESIMD_NS::simd_mask<N> pred) {
2375  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2376  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2377  p, offsets, pred);
2378 }
2379 
2380 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2381  lsc_data_size DS = lsc_data_size::default_size,
2383  typename Toffset>
2384 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2385  __ESIMD_DNS::get_num_args<Op>() == 0,
2386  __ESIMD_NS::simd<T, N>>
2387 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
2388  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2389  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
2390 }
2391 
2407 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2408  lsc_data_size DS = lsc_data_size::default_size,
2410  typename Toffset>
2411 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2412  __ESIMD_NS::simd<T, N>>
2413 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2414  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2415  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2416  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2417  p, offsets, src0, pred);
2418 }
2419 
2420 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2421  lsc_data_size DS = lsc_data_size::default_size,
2423  typename OffsetObjT, typename RegionTy>
2424 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2425  __ESIMD_NS::simd<T, N>>
2426 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2427  __ESIMD_NS::simd<T, N> src0,
2428  __ESIMD_NS::simd_mask<N> pred = 1) {
2429  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2430  pred);
2431 }
2432 
2433 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2434  lsc_data_size DS = lsc_data_size::default_size,
2436  typename Toffset>
2437 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2438  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2439  ((Op != __ESIMD_NS::atomic_op::store &&
2440  Op != __ESIMD_NS::atomic_op::xchg) ||
2441  N == 1),
2442  __ESIMD_NS::simd<T, N>>
2443 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2444  __ESIMD_NS::simd_mask<N> pred = 1) {
2445  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2446  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, pred);
2447 }
2448 
2465 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2466  lsc_data_size DS = lsc_data_size::default_size,
2468  typename Toffset>
2469 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2470  __ESIMD_NS::simd<T, N>>
2471 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2472  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2473  __ESIMD_NS::simd_mask<N> pred) {
2474  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2475  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2476  p, offsets, src0, src1, pred);
2477 }
2478 
2479 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2480  lsc_data_size DS = lsc_data_size::default_size,
2482  typename OffsetObjT, typename RegionTy>
2483 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2484  __ESIMD_NS::simd<T, N>>
2485 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2486  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2487  __ESIMD_NS::simd_mask<N> pred = 1) {
2488  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2489  src1, pred);
2490 }
2491 
2492 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2493  lsc_data_size DS = lsc_data_size::default_size,
2495  typename Toffset>
2496 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2497  __ESIMD_DNS::get_num_args<Op>() == 2,
2498  __ESIMD_NS::simd<T, N>>
2499 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2500  __ESIMD_NS::simd<T, N> src1,
2501  __ESIMD_NS::simd_mask<N> pred = 1) {
2502  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2503  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, src1, pred);
2504 }
2505 
2523 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2524  lsc_data_size DS = lsc_data_size::default_size,
2526  typename AccessorTy, typename Toffset>
2527 __ESIMD_API std::enable_if_t<
2528  __ESIMD_DNS::is_device_accessor_with_v<
2529  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
2530  (Op == __ESIMD_NS::atomic_op::load ||
2531  __ESIMD_DNS::is_device_accessor_with_v<
2532  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>),
2533  __ESIMD_NS::simd<T, N>>
2534 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2535  __ESIMD_NS::simd_mask<N> pred) {
2536  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2537  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2538  acc, offsets, pred);
2539 }
2540 
2556 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2557  lsc_data_size DS = lsc_data_size::default_size,
2559  typename AccessorTy>
2560 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2561  __ESIMD_NS::simd<T, N>>
2562 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2563  __ESIMD_NS::simd_mask<N> pred) {
2564  return lsc_slm_atomic_update<Op, T, N, DS>(
2565  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
2566 }
2567 
2586 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2587  lsc_data_size DS = lsc_data_size::default_size,
2589  typename AccessorTy, typename Toffset>
2590 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2591  __ESIMD_NS::simd<T, N>>
2592 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2593  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2594  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2595  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2596  acc, offsets, src0, pred);
2597 }
2598 
2615 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2616  lsc_data_size DS = lsc_data_size::default_size,
2618  typename AccessorTy>
2619 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2620  __ESIMD_NS::simd<T, N>>
2621 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2622  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2623  return lsc_slm_atomic_update<Op, T, N, DS>(
2624  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, pred);
2625 }
2626 
2646 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2647  lsc_data_size DS = lsc_data_size::default_size,
2649  typename AccessorTy, typename Toffset>
2650 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2651  __ESIMD_NS::simd<T, N>>
2652 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2653  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2654  __ESIMD_NS::simd_mask<N> pred) {
2655  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2656  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2657  acc, offsets, src0, src1, pred);
2658 }
2659 
2677 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2678  lsc_data_size DS = lsc_data_size::default_size,
2680  typename AccessorTy>
2681 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2682  __ESIMD_NS::simd<T, N>>
2683 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2684  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2685  __ESIMD_NS::simd_mask<N> pred) {
2686  return lsc_slm_atomic_update<Op, T, N, DS>(
2687  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, src1, pred);
2688 }
2689 
2698 template <lsc_memory_kind Kind = lsc_memory_kind::untyped_global,
2699  lsc_fence_op FenceOp = lsc_fence_op::none,
2700  lsc_scope Scope = lsc_scope::group, int N = 16>
2701 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::fence<Kind, FenceOp, Scope>()")
2702 __ESIMD_API void lsc_fence(__ESIMD_NS::simd_mask<N> pred = 1) {
2703  static_assert(
2704  Kind != lsc_memory_kind::shared_local ||
2705  (FenceOp == lsc_fence_op::none && Scope == lsc_scope::group),
2706  "SLM fence must have 'none' lsc_fence_op and 'group' scope");
2707  static_assert(Kind != lsc_memory_kind::untyped_global_low_pri,
2708  "lsc_memory_kind::untyped_global_low_pri is not supported in HW"
2709  " and/or GPU drivers");
2710  __esimd_lsc_fence<static_cast<uint8_t>(Kind), static_cast<uint8_t>(FenceOp),
2711  static_cast<uint8_t>(Scope), N>(pred.data());
2712 }
2713 
2715 
2718 
2721 
2723 __ESIMD_API int32_t get_hw_thread_id() {
2724 #ifdef __SYCL_DEVICE_ONLY__
2725  return __spirv_BuiltInGlobalHWThreadIDINTEL();
2726 #else
2727  return std::rand();
2728 #endif // __SYCL_DEVICE_ONLY__
2729 }
2731 __ESIMD_API int32_t get_subdevice_id() {
2732 #ifdef __SYCL_DEVICE_ONLY__
2733  return __spirv_BuiltInSubDeviceIDINTEL();
2734 #else
2735  return 0;
2736 #endif
2737 }
2738 
2740 
2741 } // namespace experimental::esimd
2742 
2743 namespace esimd {
2744 
2748 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2749 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2750  __ESIMD_DNS::get_num_args<Op>() == 0,
2751  simd<T, N>>
2753  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2754  p, offset, mask);
2755 }
2756 
2757 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2758  typename RegionTy>
2759 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0, simd<T, N>>
2761  simd_mask<N> mask = 1) {
2762  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2763  p, offsets, mask);
2764 }
2765 
2766 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2767 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2768  __ESIMD_DNS::get_num_args<Op>() == 0,
2769  simd<T, N>>
2770 atomic_update(T *p, Toffset offset, simd_mask<N> mask = 1) {
2771  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2772  p, offset, mask);
2773 }
2774 
2776 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2777 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2778  __ESIMD_DNS::get_num_args<Op>() == 1,
2779  simd<T, N>>
2781  simd_mask<N> mask) {
2782  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2783  p, offset, src0, mask);
2784 }
2785 
2786 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2787  typename RegionTy>
2788 __ESIMD_API __ESIMD_API
2789  std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1, simd<T, N>>
2791  simd<T, N> src0, simd_mask<N> mask = 1) {
2792  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2793  p, offsets, src0, mask);
2794 }
2795 
2796 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2797 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2798  __ESIMD_DNS::get_num_args<Op>() == 1,
2799  simd<T, N>>
2800 atomic_update(T *p, Toffset offset, simd<T, N> src0, simd_mask<N> mask = 1) {
2801  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2802  p, offset, src0, mask);
2803 }
2804 
2806 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2807 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2808  __ESIMD_DNS::get_num_args<Op>() == 2,
2809  simd<T, N>>
2811  simd_mask<N> mask) {
2812  // 2-argument lsc_atomic_update arguments order matches the standard one -
2813  // expected value first, then new value. But atomic_update uses reverse
2814  // order, hence the src1/src0 swap.
2815  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2816  p, offset, src1, src0, mask);
2817 }
2818 
2819 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2820  typename RegionTy>
2821 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2, simd<T, N>>
2823  simd<T, N> src1, simd_mask<N> mask = 1) {
2824  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2825  p, offsets, src1, src0, mask);
2826 }
2827 
2828 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2829 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2830  __ESIMD_DNS::get_num_args<Op>() == 2,
2831  __ESIMD_NS::simd<T, N>>
2833  simd_mask<N> mask = 1) {
2834  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2835  p, offset, src1, src0, mask);
2836 }
2837 
2838 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2839  typename AccessorTy>
2840 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2841  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2842  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2843  simd<T, N>>
2844 atomic_update(AccessorTy acc, simd<Toffset, N> offset, simd_mask<N> mask) {
2845  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2846  acc, offset, mask);
2847 }
2848 
2849 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2850  typename RegionTy, typename AccessorTy>
2851 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
2852  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2853  simd<T, N>>
2855  simd_mask<N> mask) {
2856  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2857  acc, offsets, mask);
2858 }
2859 
2860 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2861  typename AccessorTy>
2862 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2863  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2864  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2865  simd<T, N>>
2866 atomic_update(AccessorTy acc, Toffset offset, simd_mask<N> mask) {
2867  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2868  acc, offset, mask);
2869 }
2870 
2872 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2873  typename AccessorTy>
2874 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2875  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2876  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2877  simd<T, N>>
2879  simd_mask<N> mask) {
2880  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2881  acc, offset, src0, mask);
2882 }
2883 
2884 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2885  typename RegionTy, typename AccessorTy>
2886 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
2887  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2888  simd<T, N>>
2890  simd<T, N> src0, simd_mask<N> mask) {
2891  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2892  acc, offsets, src0, mask);
2893 }
2894 
2895 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2896  typename AccessorTy>
2897 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2898  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2899  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2900  simd<T, N>>
2901 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0,
2902  simd_mask<N> mask) {
2903  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2904  acc, offset, src0, mask);
2905 }
2906 
2908 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2909  typename AccessorTy>
2910 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2911  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2912  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2913  simd<T, N>>
2915  simd<T, N> src1, simd_mask<N> mask) {
2916  // 2-argument lsc_atomic_update arguments order matches the standard one -
2917  // expected value first, then new value. But atomic_update uses reverse
2918  // order, hence the src1/src0 swap.
2919  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2920  acc, offset, src1, src0, mask);
2921 }
2922 
2923 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2924  typename RegionTy, typename AccessorTy>
2925 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
2926  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2927  simd<T, N>>
2930  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2931  acc, offsets, src1, src0, mask);
2932 }
2933 
2934 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2935  typename AccessorTy>
2936 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2937  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2938  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2939  __ESIMD_NS::simd<T, N>>
2940 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0, simd<T, N> src1,
2941  simd_mask<N> mask) {
2942  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2943  acc, offset, src1, src0, mask);
2944 }
2945 
2963 template <int SLMAmount> class slm_allocator {
2964  int offset;
2965 
2966 public:
2968  slm_allocator() { offset = __esimd_slm_alloc(SLMAmount); }
2969 
2971  ESIMD_INLINE int get_offset() const { return offset; }
2972 
2974  ~slm_allocator() { __esimd_slm_free(offset); }
2975 };
2976 
2977 } // namespace esimd
2978 } // namespace ext::intel
2979 } // namespace _V1
2980 } // namespace sycl
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
The main simd vector class.
Definition: simd.hpp:53
RAII-style class used to implement "semi-dynamic" SLM allocation.
Definition: memory.hpp:2963
~slm_allocator()
Releases the SLM chunk allocated in the constructor.
Definition: memory.hpp:2974
slm_allocator()
Allocates the amount of SLM which is class' template parameter.
Definition: memory.hpp:2968
ESIMD_INLINE int get_offset() const
Definition: memory.hpp:2971
Container class to hold parameters for load2d/store2d functions
Definition: memory.hpp:1881
T * get_data_pointer() const
Get a surface base address
Definition: memory.hpp:1925
config_2d_mem_access & set_x(int32_t X)
Sets top left corner X coordinate of the block
Definition: memory.hpp:2040
constexpr int32_t get_number_of_blocks() const
Get number of blocks
Definition: memory.hpp:1992
constexpr int32_t get_width() const
Get width of the block
Definition: memory.hpp:1980
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload, sycl::ext::intel::esimd::simd< T1, N > Data)
config_2d_mem_access & set_surface_width(uint32_t SurfaceWidth)
Sets surface width
Definition: memory.hpp:2010
int32_t get_x() const
Get top left corner X coordinate of the block
Definition: memory.hpp:1962
config_2d_mem_access & set_data_pointer(T *Ptr)
Sets surface base address
Definition: memory.hpp:1999
config_2d_mem_access & set_surface_height(uint32_t SurfaceHeight)
Sets surface height
Definition: memory.hpp:2020
config_2d_mem_access(const config_2d_mem_access &other)
Copy constructor
Definition: memory.hpp:1894
config_2d_mem_access & set_y(int32_t Y)
Sets top left corner Y coordinate of the block
Definition: memory.hpp:2050
config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth, uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X, int32_t Y)
Constructor
Definition: memory.hpp:1908
config_2d_mem_access & set_surface_pitch(uint32_t SurfacePitch)
Sets surface pitch
Definition: memory.hpp:2030
constexpr int32_t get_height() const
Get height of the block
Definition: memory.hpp:1986
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T1, N > lsc_load_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
int32_t get_y() const
Get top left corner Y coordinate of the block
Definition: memory.hpp:1971
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
("use sycl::ext::intel::esimd::memory_kind") lsc_memory_kind __ESIMD_DNS::lsc_data_size lsc_data_size
The scope that lsc_fence operation should apply to Supported platforms: DG2, PVC.
Definition: common.hpp:64
sycl::ext::intel::esimd::cache_hint cache_hint
L1 or L2 cache hint kinds.
Definition: common.hpp:108
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
split_barrier_action
Represents a split barrier action.
Definition: common.hpp:111
__ESIMD_API int32_t get_subdevice_id()
Get subdevice ID.
Definition: memory.hpp:2731
__ESIMD_API int32_t get_hw_thread_id()
Get HW Thread ID.
Definition: memory.hpp:2723
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:180
__SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::addc(carry, src0, src1);") __ESIMD_API sycl
Definition: math.hpp:500
__ESIMD_API SZ src0
Definition: math.hpp:180
__ESIMD_API void lsc_slm_block_store(uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
Transposed SLM scatter with 1 channel.
Definition: memory.hpp:1440
__ESIMD_API void lsc_prefetch(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer prefetch gather.
Definition: memory.hpp:1241
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > lsc_atomic_update(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
USM pointer atomic.
Definition: memory.hpp:2373
__ESIMD_API void lsc_slm_scatter(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM scatter.
Definition: memory.hpp:1420
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block load.
Definition: memory.hpp:1794
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_slm_atomic_update(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
SLM atomic.
Definition: memory.hpp:2300
__ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
2D USM pointer block store.
Definition: memory.hpp:1865
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > lsc_slm_block_load(uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
Transposed SLM gather with 1 channel.
Definition: memory.hpp:612
__ESIMD_API void lsc_fence(sycl::ext::intel::esimd::simd_mask< N > pred=1)
Memory fence.
Definition: memory.hpp:2702
__ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block prefetch.
Definition: memory.hpp:1828
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_slm_gather(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM gather.
Definition: memory.hpp:562
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > lsc_block_load(const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed gather with 1 channel.
Definition: memory.hpp:944
__ESIMD_API void lsc_scatter(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer scatter.
Definition: memory.hpp:1471
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > lsc_block_store(T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed scatter with 1 channel.
Definition: memory.hpp:1605
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_gather(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer gather.
Definition: memory.hpp:669
__ESIMD_API void named_barrier_wait(uint8_t id)
Wait on a named barrier Available only on PVC.
Definition: memory.hpp:369
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
Definition: memory.hpp:422
__ESIMD_API void named_barrier_init()
Initialize number of named barriers for a kernel Available only on PVC.
Definition: memory.hpp:377
__ESIMD_API void named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode, uint32_t num_producers, uint32_t num_consumers)
Perform signal operation for the given named barrier Available only on PVC.
Definition: memory.hpp:393
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
Definition: memory.hpp:5966
__ESIMD_API void split_barrier()
Generic work-group split barrier.
Definition: memory.hpp:29
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
Definition: memory.hpp:9405
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:8016
@ global_coherent_fence
“Commit enable” - wait for fence to complete before continuing.
Definition: memory.hpp:7953
@ local_barrier
Issue SLM memory barrier only. If not set, the memory barrier is global.
Definition: memory.hpp:7971
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw send.
Definition: memory.hpp:160
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, sycl::ext::intel::esimd::simd< T3, n3 > msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw sends.
Definition: memory.hpp:70
typename make_L1_L2_alignment_properties< L1H, L2H, Alignment >::type make_L1_L2_alignment_properties_t
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, N > lsc_format_input(sycl::ext::intel::esimd::simd< T, N > Vals)
Definition: memory.hpp:462
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > lsc_format_ret(sycl::ext::intel::esimd::simd< T1, N > Vals)
Definition: memory.hpp:469
Definition: access.hpp:18
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:161