DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory.hpp
Go to the documentation of this file.
1 //==-------------- memory.hpp - DPC++ Explicit SIMD API --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Implement experimental Explicit SIMD memory-access APIs.
9 //===----------------------------------------------------------------------===//
10 
11 #pragma once
12 
17 
18 namespace sycl {
19 inline namespace _V1 {
20 namespace ext::intel {
21 namespace experimental::esimd {
22 
25 
28 template <split_barrier_action flag> __ESIMD_API void split_barrier() {
29  __esimd_sbarrier(flag);
30 }
31 
32 __SYCL_DEPRECATED("use split_barrier<split_barrier_action>()")
33 __ESIMD_API void split_barrier(split_barrier_action flag) {
34  __esimd_sbarrier(flag);
35 }
36 
38 
41 
66 template <typename T1, int n1, typename T2, int n2, typename T3, int n3,
67  int N = 16>
68 __ESIMD_API __ESIMD_NS::simd<T1, n1>
69 raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
70  __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
71  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
72  uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0,
73  __ESIMD_NS::simd_mask<N> mask = 1) {
74  constexpr unsigned _Width1 = n1 * sizeof(T1);
75  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
76  constexpr unsigned _Width2 = n2 * sizeof(T2);
77  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
78  constexpr unsigned _Width3 = n3 * sizeof(T3);
79  static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
80 
81  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
82  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
83  using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
84 
85  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
86  return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
87  modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
88  msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
89 }
90 
110 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
111  uint8_t numDst, uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1,
112  int n1, typename T2, int n2, typename T3, int n3>
113 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_sends")
114 __ESIMD_API __ESIMD_NS::simd<T1, n1> raw_sends(
115  __ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
116  __ESIMD_NS::simd<T3, n3> msgSrc1, uint32_t exDesc, uint32_t msgDesc,
117  __ESIMD_NS::simd_mask<execSize> mask = 1) {
118  constexpr unsigned _Width1 = n1 * sizeof(T1);
119  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
120  constexpr unsigned _Width2 = n2 * sizeof(T2);
121  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
122  constexpr unsigned _Width3 = n3 * sizeof(T3);
123  static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
124 
125  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
126  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
127  using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
128 
129  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
130 
131  return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, execSize>(
132  modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
133  msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
134 }
135 
157 template <typename T1, int n1, typename T2, int n2, int N = 16>
158 __ESIMD_API __ESIMD_NS::simd<T1, n1>
159 raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
160  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
161  uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0,
162  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
163  constexpr unsigned _Width1 = n1 * sizeof(T1);
164  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
165  constexpr unsigned _Width2 = n2 * sizeof(T2);
166  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
167 
168  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
169  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
170 
171  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
172  return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
173  modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
174  msgSrc0.data(), msgDst.data());
175 }
176 
194 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst,
195  uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
196  typename T2, int n2>
197 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_send")
198 __ESIMD_API __ESIMD_NS::simd<T1, n1> raw_send(
199  __ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
200  uint32_t exDesc, uint32_t msgDesc,
201  __ESIMD_NS::simd_mask<execSize> mask = 1) {
202  constexpr unsigned _Width1 = n1 * sizeof(T1);
203  static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar");
204  constexpr unsigned _Width2 = n2 * sizeof(T2);
205  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
206 
207  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
208  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
209 
210  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
211  return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, execSize>(
212  modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
213  msgSrc0.data(), msgDst.data());
214 }
215 
236 template <typename T1, int n1, typename T2, int n2, int N = 16>
237 __ESIMD_API void
238 raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
239  uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid,
240  uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0,
241  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
242  constexpr unsigned _Width1 = n1 * sizeof(T1);
243  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
244  constexpr unsigned _Width2 = n2 * sizeof(T2);
245  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
246 
247  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
248  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
249 
250  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
251  __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
252  modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
253  msgSrc0.data(), msgSrc1.data());
254 }
255 
272 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1,
273  uint8_t isEOT = 0, uint8_t isSendc = 0, typename T1, int n1,
274  typename T2, int n2>
275 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_sends")
276 __ESIMD_API
277  void raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0,
278  __ESIMD_NS::simd<T2, n2> msgSrc1, uint32_t exDesc,
279  uint32_t msgDesc, __ESIMD_NS::simd_mask<execSize> mask = 1) {
280  constexpr unsigned _Width1 = n1 * sizeof(T1);
281  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
282  constexpr unsigned _Width2 = n2 * sizeof(T2);
283  static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
284 
285  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
286  using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
287 
288  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
289  __esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, execSize>(
290  modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
291  msgSrc0.data(), msgSrc1.data());
292 }
293 
312 template <typename T1, int n1, int N = 16>
313 __ESIMD_API void
314 raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
315  uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
316  uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
317  constexpr unsigned _Width1 = n1 * sizeof(T1);
318  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
319  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
320  uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
321  __esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
322  numSrc0, sfid, exDesc, msgDesc,
323  msgSrc0.data());
324 }
325 
341 template <uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0,
342  uint8_t isSendc = 0, typename T1, int n1>
343 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::raw_send")
344 __ESIMD_API
345  void raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc,
346  uint32_t msgDesc, __ESIMD_NS::simd_mask<execSize> mask = 1) {
347  constexpr unsigned _Width1 = n1 * sizeof(T1);
348  static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
349  using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
350  constexpr uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
351  __esimd_raw_send2_noresult<ElemT1, n1, execSize>(
352  modifier, execSize, mask.data(), numSrc0, sfid, exDesc, msgDesc,
353  msgSrc0.data());
354 }
355 
357 
360 
363 
368 __ESIMD_API void named_barrier_wait(uint8_t id) {
369  __esimd_nbarrier(0 /*wait*/, id, 0 /*thread count*/);
370 }
371 
376 template <uint8_t NbarCount> __ESIMD_API void named_barrier_init() {
377  __esimd_nbarrier_init(NbarCount);
378 }
379 
392 __ESIMD_API void named_barrier_signal(uint8_t barrier_id,
393  uint8_t producer_consumer_mode,
394  uint32_t num_producers,
395  uint32_t num_consumers) {
398 #ifdef __ESIMD_USE_NEW_NAMED_BARRIER_INTRIN
399  __esimd_nbarrier_arrive(barrier_id, producer_consumer_mode, num_producers,
400  num_consumers);
401 #else
402  constexpr uint32_t gateway = 3;
403  constexpr uint32_t barrier = 4;
404  constexpr uint32_t descriptor = 1 << 25 | // Message length: 1 register
405  0 << 12 | // Fence Data Ports: No fence
406  barrier; // Barrier subfunction
407 
408  __ESIMD_DNS::vector_type_t<uint32_t, 8> payload = 0;
409  payload[2] = (num_consumers & 0xff) << 24 | (num_producers & 0xff) << 16 |
410  producer_consumer_mode << 14 | (barrier_id & 0b11111) << 0;
411  __esimd_raw_send_nbarrier_signal<uint32_t, 8>(
412  0 /*sendc*/, gateway, descriptor, payload, 1 /*pred*/);
413 #endif
414 }
415 
419 template <typename T, int N>
420 __ESIMD_API std::enable_if_t<(sizeof(T) * N >= 2)>
421 wait(__ESIMD_NS::simd<T, N> value) {
422 #ifdef __SYCL_DEVICE_ONLY__
423  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
424  __esimd_wait(Word);
425 #endif // __SYCL_DEVICE_ONLY__
426 }
427 
431 template <typename T, typename RegionT>
432 __ESIMD_API std::enable_if_t<
433  (RegionT::length * sizeof(typename RegionT::element_type) >= 2)>
434 wait(__ESIMD_NS::simd_view<T, RegionT> value) {
435 #ifdef __SYCL_DEVICE_ONLY__
436  uint16_t Word = value.template bit_cast_view<uint16_t>()[0];
437  __esimd_wait(Word);
438 #endif // __SYCL_DEVICE_ONLY__
439 }
440 
442 
445 
448 
449 namespace detail {
450 // Compute the data size for 2d block load or store.
451 template <typename T, int NBlocks, int Height, int Width, bool Transposed,
452  bool Transformed>
453 constexpr int get_lsc_block_2d_data_size() {
454  return __ESIMD_DNS::get_lsc_block_2d_data_size<T, NBlocks, Height, Width,
455  Transposed, Transformed>();
456 }
457 
458 // Format u8 and u16 to u8u32 and u16u32 by doing garbage-extension.
459 template <typename RT, typename T, int N>
460 ESIMD_INLINE __ESIMD_NS::simd<RT, N>
461 lsc_format_input(__ESIMD_NS::simd<T, N> Vals) {
462  return __ESIMD_DNS::lsc_format_input<RT, T, N>(Vals);
463 }
464 
465 // Format u8u32 and u16u32 back to u8 and u16.
466 template <typename T, typename T1, int N>
467 ESIMD_INLINE __ESIMD_NS::simd<T, N>
468 lsc_format_ret(__ESIMD_NS::simd<T1, N> Vals) {
469  return __ESIMD_DNS::lsc_format_ret<T, T1, N>(Vals);
470 }
471 
472 template <typename T> constexpr uint32_t get_lsc_data_size() {
473  switch (sizeof(T)) {
474  case 1:
475  return 0;
476  case 2:
477  return 1;
478  case 4:
479  return 2;
480  case 8:
481  return 3;
482  default:
483  static_assert(true, "Unsupported data type.");
484  }
485 }
486 
487 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
488 constexpr uint32_t get_lsc_load_cache_mask() {
489  if constexpr (L1H == cache_hint::read_invalidate &&
490  L2H == cache_hint::cached) {
491  return 7;
492  }
493  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
494  return 6;
495  }
496  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
497  return 5;
498  }
499  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::cached) {
500  return 4;
501  }
502  if constexpr (L1H == cache_hint::cached && L2H == cache_hint::uncached) {
503  return 3;
504  }
505  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
506  return 2;
507  }
508  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
509  return 1;
510  }
511  return 0;
512 }
513 
514 template <cache_hint L1H = cache_hint::none, cache_hint L2H = cache_hint::none>
515 constexpr uint32_t get_lsc_store_cache_mask() {
516  if constexpr (L1H == cache_hint::write_back && L2H == cache_hint::cached) {
517  return 7;
518  }
519  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::cached) {
520  return 6;
521  }
522  if constexpr (L1H == cache_hint::streaming && L2H == cache_hint::uncached) {
523  return 5;
524  }
525  if constexpr (L1H == cache_hint::write_through && L2H == cache_hint::cached) {
526  return 4;
527  }
528  if constexpr (L1H == cache_hint::write_through &&
529  L2H == cache_hint::uncached) {
530  return 3;
531  }
532  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::cached) {
533  return 2;
534  }
535  if constexpr (L1H == cache_hint::uncached && L2H == cache_hint::uncached) {
536  return 1;
537  }
538  return 0;
539 }
540 
541 } // namespace detail
542 
558 template <typename T, int NElts = 1,
559  lsc_data_size DS = lsc_data_size::default_size, int N>
560 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
561 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
562  __ESIMD_NS::simd_mask<N> pred = 1) {
563  __ESIMD_NS::simd<T, N * NElts> pass_thru;
564  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
565 }
566 
584 template <typename T, int NElts = 1,
585  lsc_data_size DS = lsc_data_size::default_size, int N>
586 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
587 lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
588  __ESIMD_NS::simd_mask<N> pred,
589  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
590  return __ESIMD_DNS::slm_gather_impl<T, NElts, DS>(offsets, pred, pass_thru);
591 }
592 
608 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
609  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
610 __ESIMD_API __ESIMD_NS::simd<T, NElts>
611 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred = 1,
612  FlagsT flags = FlagsT{}) {
613  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
614  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
615  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, Props);
616 }
617 
635 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
636  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
637 __ESIMD_API __ESIMD_NS::simd<T, NElts>
638 lsc_slm_block_load(uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
639  __ESIMD_NS::simd<T, NElts> pass_thru) {
640  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
641  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
642  return __ESIMD_NS::slm_block_load<T, NElts>(offset, pred, pass_thru, Props);
643 }
644 
663 template <typename T, int NElts = 1,
664  lsc_data_size DS = lsc_data_size::default_size,
666  int N, typename Toffset>
667 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
668 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
669  __ESIMD_NS::simd_mask<N> pred = 1) {
670  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
671  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally undefined.
672  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
673  PassThru);
674 }
675 
696 template <typename T, int NElts = 1,
697  lsc_data_size DS = lsc_data_size::default_size,
699  int N, typename Toffset>
700 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
701 lsc_gather(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
702  __ESIMD_NS::simd_mask<N> pred,
703  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
704  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
705  return __ESIMD_DNS::gather_impl<T, NElts, DS, PropertyListT>(p, offsets, pred,
706  pass_thru);
707 }
708 
709 template <typename T, int NElts = 1,
710  lsc_data_size DS = lsc_data_size::default_size,
712  int N, typename OffsetObjT, typename RegionTy>
713 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
714 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
715  __ESIMD_NS::simd_mask<N> pred = 1) {
716  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
717 }
718 
719 template <typename T, int NElts = 1,
720  lsc_data_size DS = lsc_data_size::default_size,
722  int N, typename OffsetObjT, typename RegionTy>
723 __ESIMD_API __ESIMD_NS::simd<T, N * NElts>
724 lsc_gather(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
725  __ESIMD_NS::simd_mask<N> pred,
726  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
727  return lsc_gather<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred,
728  pass_thru);
729 }
730 
731 template <typename T, int NElts = 1,
732  lsc_data_size DS = lsc_data_size::default_size,
734  int N, typename Toffset>
735 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
736  __ESIMD_NS::simd<T, N * NElts>>
737 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
738  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
739  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
740 }
741 
742 template <typename T, int NElts = 1,
743  lsc_data_size DS = lsc_data_size::default_size,
745  int N, typename Toffset>
746 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>,
747  __ESIMD_NS::simd<T, N * NElts>>
748 lsc_gather(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred,
749  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
750  return lsc_gather<T, NElts, DS, L1H, L2H, N>(
751  p, __ESIMD_NS::simd<Toffset, N>(offset), pred, pass_thru);
752 }
753 
773 template <typename T, int NElts = 1,
774  lsc_data_size DS = lsc_data_size::default_size,
776  int N, typename AccessorTy>
777 __ESIMD_API
778  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
779  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
780  __ESIMD_NS::simd<T, N * NElts>>
781  lsc_gather(AccessorTy acc,
782  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
783  __ESIMD_NS::simd_mask<N> pred = 1) {
784 #ifdef __ESIMD_FORCE_STATELESS_MEM
785  return lsc_gather<T, NElts, DS, L1H, L2H>(
786  reinterpret_cast<T *>(acc.get_pointer().get()), offsets, pred);
787 #else
788  __ESIMD_NS::simd<T, N * NElts> PassThru; // Intentionally uninitialized.
789  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
790  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
791  acc, offsets, pred, PassThru);
792 #endif // __ESIMD_FORCE_STATELESS_MEM
793 }
794 
795 #ifdef __ESIMD_FORCE_STATELESS_MEM
796 template <typename T, int NElts = 1,
797  lsc_data_size DS = lsc_data_size::default_size,
799  int N, typename AccessorTy, typename Toffset>
800 __ESIMD_API std::enable_if_t<
801  __ESIMD_DNS::is_device_accessor_with_v<
802  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
803  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
804  __ESIMD_NS::simd<T, N * NElts>>
805 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
806  __ESIMD_NS::simd_mask<N> pred = 1) {
807  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
808  acc, convert<uint64_t>(offsets), pred);
809 }
810 #endif
811 
812 template <typename T, int NElts = 1,
813  lsc_data_size DS = lsc_data_size::default_size,
815  int N, typename AccessorTy>
816 __ESIMD_API
817  std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
818  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
819  __ESIMD_NS::simd<T, N * NElts>>
820  lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
821  __ESIMD_NS::simd_mask<N> pred = 1) {
822  return lsc_slm_gather<T, NElts, DS>(
823  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
824 }
825 
847 template <typename T, int NElts = 1,
848  lsc_data_size DS = lsc_data_size::default_size,
850  int N, typename AccessorTy>
851 __ESIMD_API
852  std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
853  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>,
854  __ESIMD_NS::simd<T, N * NElts>>
855  lsc_gather(AccessorTy acc,
856  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
857  __ESIMD_NS::simd_mask<N> pred,
858  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
859 #ifdef __ESIMD_FORCE_STATELESS_MEM
860  return lsc_gather<T, NElts, DS, L1H, L2H>(
861  reinterpret_cast<T *>(acc.get_pointer().get()), offsets, pred, pass_thru);
862 
863 #else
864  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
865  return __ESIMD_DNS::gather_impl<T, N * NElts, NElts, PropertyListT, DS>(
866  acc, offsets, pred, pass_thru);
867 #endif // __ESIMD_FORCE_STATELESS_MEM
868 }
869 
870 #ifdef __ESIMD_FORCE_STATELESS_MEM
871 template <typename T, int NElts = 1,
872  lsc_data_size DS = lsc_data_size::default_size,
874  int N, typename AccessorTy, typename Toffset>
875 __ESIMD_API std::enable_if_t<
876  __ESIMD_DNS::is_device_accessor_with_v<
877  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
878  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>,
879  __ESIMD_NS::simd<T, N * NElts>>
880 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
881  __ESIMD_NS::simd_mask<N> pred,
882  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
883  return lsc_gather<T, NElts, DS, L1H, L2H, N, AccessorTy>(
884  acc, convert<uint64_t>(offsets), pred, pass_thru);
885 }
886 #endif
887 
888 template <typename T, int NElts = 1,
889  lsc_data_size DS = lsc_data_size::default_size,
891  int N, typename AccessorTy>
892 __ESIMD_API std::enable_if_t<
893  sycl::detail::acc_properties::is_local_accessor_v<AccessorTy>,
894  __ESIMD_NS::simd<T, N * NElts>>
895 lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
896  __ESIMD_NS::simd_mask<N> pred,
897  __ESIMD_NS::simd<T, N * NElts> pass_thru) {
898  return lsc_slm_gather<T, NElts, DS>(
899  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru);
900 }
901 
937 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
939  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
940 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
941  __ESIMD_NS::simd<T, NElts>>
942 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
944  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
945  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
946  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
947  PassThru);
948 }
949 
978 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
980  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
981 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
982  __ESIMD_NS::simd<T, NElts>>
983 lsc_block_load(const T *p, FlagsT) {
985  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
986  __ESIMD_NS::simd<T, NElts> PassThru; // Intentionally undefined.
987  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(
988  p, __ESIMD_NS::simd_mask<1>(1), PassThru);
989 }
990 
1022 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1024  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1025 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1026  __ESIMD_NS::simd<T, NElts>>
1027 lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred,
1028  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1029  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1030  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1031  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(p, pred,
1032  pass_thru);
1033 }
1034 
1066 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1068  typename AccessorTy,
1069  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1070 __ESIMD_API std::enable_if_t<
1071  __ESIMD_DNS::is_device_accessor_with_v<
1072  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1073  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1074  __ESIMD_NS::simd<T, NElts>>
1076  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1077  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1078  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1079  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1080  pred);
1081 }
1082 
1083 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1085  typename AccessorTy,
1086  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1087 __ESIMD_API std::enable_if_t<
1088  __ESIMD_DNS::is_local_accessor_with_v<
1089  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1090  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1091  __ESIMD_NS::simd<T, NElts>>
1092 lsc_block_load(AccessorTy acc, uint32_t offset,
1093  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT flags = FlagsT{}) {
1094  return lsc_slm_block_load<T, NElts, DS>(
1095  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, flags);
1096 }
1097 
1125 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1127  typename AccessorTy,
1128  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1129 __ESIMD_API std::enable_if_t<
1130  __ESIMD_DNS::is_device_accessor_with_v<
1131  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1132  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1133  __ESIMD_NS::simd<T, NElts>>
1135  FlagsT flags) {
1136  return lsc_block_load<T, NElts, DS, L1H, L2H>(
1137  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1138 }
1139 
1140 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1142  typename AccessorTy,
1143  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1144 __ESIMD_API std::enable_if_t<
1145  __ESIMD_DNS::is_local_accessor_with_v<
1146  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1147  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1148  __ESIMD_NS::simd<T, NElts>>
1149 lsc_block_load(AccessorTy acc, uint32_t offset, FlagsT flags) {
1150  return lsc_block_load<T, NElts, DS, L1H, L2H>(
1151  acc, offset, __ESIMD_NS::simd_mask<1>(1), flags);
1152 }
1153 
1186 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1188  typename AccessorTy,
1189  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1190 __ESIMD_API std::enable_if_t<
1191  __ESIMD_DNS::is_device_accessor_with_v<
1192  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1193  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1194  __ESIMD_NS::simd<T, NElts>>
1196  __ESIMD_NS::simd_mask<1> pred,
1197  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT = {}) {
1198  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1199  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1200  return __ESIMD_DNS::block_load_impl<T, NElts, PropertyListT>(acc, offset,
1201  pred, pass_thru);
1202 }
1203 
1204 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1206  typename AccessorTy,
1207  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1208 __ESIMD_API std::enable_if_t<
1209  __ESIMD_DNS::is_local_accessor_with_v<
1210  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1211  __ESIMD_NS::is_simd_flag_type_v<FlagsT>,
1212  __ESIMD_NS::simd<T, NElts>>
1213 lsc_block_load(AccessorTy acc, uint32_t offset, __ESIMD_NS::simd_mask<1> pred,
1214  __ESIMD_NS::simd<T, NElts> pass_thru, FlagsT flags = FlagsT{}) {
1215  return lsc_slm_block_load<T, NElts, DS>(
1216  offset + __ESIMD_DNS::localAccessorToOffset(acc), pred, pass_thru, flags);
1217 }
1218 
1235 template <typename T, int NElts = 1,
1236  lsc_data_size DS = lsc_data_size::default_size,
1238  int N, typename Toffset>
1239 __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1240  __ESIMD_NS::simd_mask<N> pred = 1) {
1241  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1242  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, offsets, pred);
1243 }
1244 
1245 template <typename T, int NElts = 1,
1246  lsc_data_size DS = lsc_data_size::default_size,
1248  int N, typename OffsetObjT, typename RegionTy>
1249 __ESIMD_API void
1250 lsc_prefetch(const T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1251  __ESIMD_NS::simd_mask<N> pred = 1) {
1252  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), pred);
1253 }
1254 
1255 template <typename T, int NElts = 1,
1256  lsc_data_size DS = lsc_data_size::default_size,
1258  int N, typename Toffset>
1259 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset>>
1260 lsc_prefetch(const T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
1261  lsc_prefetch<T, NElts, DS, L1H, L2H, N>(
1262  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
1263 }
1264 
1278 template <typename T, int NElts = 1,
1279  lsc_data_size DS = lsc_data_size::default_size,
1281 __ESIMD_API void lsc_prefetch(const T *p) {
1282  __ESIMD_NS::simd_mask<1> Mask = 1;
1283  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1284  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(p, 0, Mask);
1285 }
1286 
1304 template <typename T, int NElts = 1,
1305  lsc_data_size DS = lsc_data_size::default_size,
1307  int N, typename AccessorTy>
1308 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1309  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1310 lsc_prefetch(AccessorTy acc,
1311  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1312  __ESIMD_NS::simd_mask<N> pred = 1) {
1313 #ifdef __ESIMD_FORCE_STATELESS_MEM
1314  lsc_prefetch<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1315  offsets, pred);
1316 #else
1317  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1318  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offsets, pred);
1319 #endif
1320 }
1321 
1322 #ifdef __ESIMD_FORCE_STATELESS_MEM
1323 template <typename T, int NElts = 1,
1324  lsc_data_size DS = lsc_data_size::default_size,
1326  int N, typename AccessorTy, typename Toffset>
1327 __ESIMD_API std::enable_if_t<
1328  __ESIMD_DNS::is_device_accessor_with_v<
1329  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
1330  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1331 lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1332  __ESIMD_NS::simd_mask<N> pred = 1) {
1333  lsc_prefetch<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1334  acc, convert<uint64_t>(offsets), pred);
1335 }
1336 #endif
1337 
1353 template <typename T, int NElts = 1,
1354  lsc_data_size DS = lsc_data_size::default_size,
1356  typename AccessorTy>
1357 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1358  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read>>
1360 #ifdef __ESIMD_FORCE_STATELESS_MEM
1361  lsc_prefetch<T, NElts, DS, L1H, L2H>(
1362  __ESIMD_DNS::accessorToPointer<T>(acc, offset));
1363 #else
1364  __ESIMD_NS::simd_mask<1> Mask = 1;
1365  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1366  __ESIMD_DNS::prefetch_impl<T, NElts, DS, PropertyListT>(acc, offset, Mask);
1367 #endif
1368 }
1369 
1384 template <typename T, int NElts = 1,
1385  lsc_data_size DS = lsc_data_size::default_size, int N>
1386 __ESIMD_API void lsc_slm_scatter(__ESIMD_NS::simd<uint32_t, N> offsets,
1387  __ESIMD_NS::simd<T, N * NElts> vals,
1388  __ESIMD_NS::simd_mask<N> pred = 1) {
1389  __ESIMD_DNS::slm_scatter_impl<T, NElts, DS>(offsets, vals, pred);
1390 }
1391 
1404 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1405  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1406 __ESIMD_API void lsc_slm_block_store(uint32_t offset,
1407  __ESIMD_NS::simd<T, NElts> vals,
1408  FlagsT flags = FlagsT{}) {
1409  // Make sure we generate an LSC block store
1410  __ESIMD_NS::properties Props{__ESIMD_NS::alignment<
1411  FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>};
1412  __ESIMD_NS::simd_mask<1> pred = 1;
1413  __ESIMD_NS::slm_block_store<T, NElts>(offset, vals, pred, Props);
1414 }
1415 
1433 template <typename T, int NElts = 1,
1434  lsc_data_size DS = lsc_data_size::default_size,
1436  int N, typename Toffset>
1437 __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
1438  __ESIMD_NS::simd<T, N * NElts> vals,
1439  __ESIMD_NS::simd_mask<N> pred = 1) {
1440  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1441  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT, N, Toffset>(
1442  p, offsets, vals, pred);
1443 }
1444 
1445 template <typename T, int NElts = 1,
1446  lsc_data_size DS = lsc_data_size::default_size,
1448  int N, typename OffsetObjT, typename RegionTy>
1449 __ESIMD_API void
1450 lsc_scatter(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
1451  __ESIMD_NS::simd<T, N * NElts> vals,
1452  __ESIMD_NS::simd_mask<N> pred = 1) {
1453  lsc_scatter<T, NElts, DS, L1H, L2H, N>(p, offsets.read(), vals, pred);
1454 }
1455 
1456 template <typename T, int NElts = 1,
1457  lsc_data_size DS = lsc_data_size::default_size,
1459  int N, typename Toffset>
1460 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> && N == 1>
1461 lsc_scatter(T *p, Toffset offset, __ESIMD_NS::simd<T, N * NElts> vals,
1462  __ESIMD_NS::simd_mask<N> pred = 1) {
1463  lsc_scatter<T, NElts, DS, L1H, L2H, N>(
1464  p, __ESIMD_NS::simd<Toffset, N>(offset), vals, pred);
1465 }
1466 
1485 template <typename T, int NElts = 1,
1486  lsc_data_size DS = lsc_data_size::default_size,
1488  int N, typename AccessorTy>
1489 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_device_accessor_with_v<
1490  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1491 lsc_scatter(AccessorTy acc,
1492  __ESIMD_NS::simd<__ESIMD_DNS::DeviceAccessorOffsetT, N> offsets,
1493  __ESIMD_NS::simd<T, N * NElts> vals,
1494  __ESIMD_NS::simd_mask<N> pred = 1) {
1495 #ifdef __ESIMD_FORCE_STATELESS_MEM
1496  lsc_scatter<T, NElts, DS, L1H, L2H>(__ESIMD_DNS::accessorToPointer<T>(acc),
1497  offsets, vals, pred);
1498 #else
1499  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1500  __ESIMD_DNS::scatter_impl<T, NElts, DS, PropertyListT>(acc, offsets, vals,
1501  pred);
1502 #endif
1503 }
1504 
1505 #ifdef __ESIMD_FORCE_STATELESS_MEM
1506 template <typename T, int NElts = 1,
1507  lsc_data_size DS = lsc_data_size::default_size,
1509  int N, typename AccessorTy, typename Toffset>
1510 __ESIMD_API std::enable_if_t<
1511  __ESIMD_DNS::is_device_accessor_with_v<
1512  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1513  std::is_integral_v<Toffset> && !std::is_same_v<Toffset, uint64_t>>
1514 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
1515  __ESIMD_NS::simd<T, N * NElts> vals,
1516  __ESIMD_NS::simd_mask<N> pred = 1) {
1517  lsc_scatter<T, NElts, DS, L1H, L2H, N, AccessorTy>(
1518  acc, convert<uint64_t>(offsets), vals, pred);
1519 }
1520 #endif
1521 
1522 template <typename T, int NElts = 1,
1523  lsc_data_size DS = lsc_data_size::default_size,
1525  int N, typename AccessorTy>
1526 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_local_accessor_with_v<
1527  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>>
1528 lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
1529  __ESIMD_NS::simd<T, N * NElts> vals,
1530  __ESIMD_NS::simd_mask<N> pred = 1) {
1531  lsc_slm_scatter<T, NElts, DS>(
1532  offsets + __ESIMD_DNS::localAccessorToOffset(acc), vals, pred);
1533 }
1534 
1567 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1569  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1570 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1571 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
1572  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1573  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1574  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1575  return __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(p, vals, pred);
1576 }
1577 
1606 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1608  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1609 __ESIMD_API std::enable_if_t<__ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1610 lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1611  lsc_block_store<T, NElts, DS, L1H, L2H>(p, vals, __ESIMD_NS::simd_mask<1>(1),
1612  flags);
1613 }
1614 
1649 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1651  typename AccessorTy,
1652  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1653 __ESIMD_API std::enable_if_t<
1654  __ESIMD_DNS::is_device_accessor_with_v<
1655  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1656  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1658  __ESIMD_NS::simd<T, NElts> vals,
1659  __ESIMD_NS::simd_mask<1> pred = 1, FlagsT = {}) {
1660  using PropertyListT = __ESIMD_DNS::make_L1_L2_alignment_properties_t<
1661  L1H, L2H, FlagsT::template alignment<__ESIMD_NS::simd<T, NElts>>>;
1662  __ESIMD_DNS::block_store_impl<T, NElts, PropertyListT>(acc, offset, vals,
1663  pred);
1664 }
1665 
1666 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1668  typename AccessorTy,
1669  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1670 __ESIMD_API std::enable_if_t<
1671  __ESIMD_DNS::is_local_accessor_with_v<
1672  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1673  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1674 lsc_block_store(AccessorTy acc, uint32_t offset,
1675  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags = FlagsT{}) {
1676  lsc_slm_block_store<T, NElts, DS>(
1677  offset + __ESIMD_DNS::localAccessorToOffset(acc), vals, flags);
1678 }
1679 
1710 template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
1712  typename AccessorTy,
1713  typename FlagsT = __ESIMD_DNS::dqword_element_aligned_tag>
1714 __ESIMD_API std::enable_if_t<
1715  __ESIMD_DNS::is_accessor_with_v<
1716  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write> &&
1717  __ESIMD_NS::is_simd_flag_type_v<FlagsT>>
1719  __ESIMD_NS::simd<T, NElts> vals, FlagsT flags) {
1720  lsc_block_store<T, NElts, DS, L1H, L2H>(acc, offset, vals,
1721  __ESIMD_NS::simd_mask<1>(1), flags);
1722 }
1723 
1754 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1755  bool Transposed = false, bool Transformed = false,
1758  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1759 __ESIMD_API __ESIMD_NS::simd<T, N>
1760 lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight,
1761  unsigned SurfacePitch, int X, int Y) {
1762  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1763  return __ESIMD_DNS::load_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1764  Transposed, Transformed, PropertyListT>(
1765  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
1766 }
1767 
1790 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1793  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
1794 __ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth,
1795  unsigned SurfaceHeight, unsigned SurfacePitch,
1796  int X, int Y) {
1797  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1798  __ESIMD_DNS::prefetch_2d_impl<T, BlockWidth, BlockHeight, NBlocks,
1799  PropertyListT>(Ptr, SurfaceWidth, SurfaceHeight,
1800  SurfacePitch, X, Y);
1801 }
1802 
1827 template <typename T, int BlockWidth, int BlockHeight = 1,
1830  T, 1u, BlockHeight, BlockWidth, false, false>()>
1831 __ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth,
1832  unsigned SurfaceHeight, unsigned SurfacePitch,
1833  int X, int Y, __ESIMD_NS::simd<T, N> Vals) {
1834  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1835  __ESIMD_DNS::store_2d_impl<T, BlockWidth, BlockHeight, PropertyListT>(
1836  Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
1837 }
1838 
1846 template <typename T, int BlockWidth, int BlockHeight, int NBlocks>
1848 public:
1852  config_2d_mem_access() : payload_data(0) {
1853  payload_data.template select<1, 1>(7) =
1854  ((NBlocks - 1) << 16) | ((BlockHeight - 1) << 8) | (BlockWidth - 1);
1855  }
1856 
1861  : payload_data(other.payload) {}
1862 
1874  config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth,
1875  uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X,
1876  int32_t Y)
1877  : config_2d_mem_access() {
1878  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1879  (uint64_t)Ptr;
1880  payload_data.template select<1, 1>(2) = SurfaceWidth;
1881  payload_data.template select<1, 1>(3) = SurfaceHeight;
1882  payload_data.template select<1, 1>(4) = SurfacePitch;
1883  payload_data.template select<1, 1>(5) = X;
1884  payload_data.template select<1, 1>(6) = Y;
1885  }
1886 
1891  T *get_data_pointer() const {
1892  return (T *)((
1893  uint64_t)(const_cast<config_2d_mem_access *>(this)
1894  ->payload_data.template bit_cast_view<uint64_t>()[0]));
1895  }
1896 
1901  uint32_t get_surface_width() const {
1902  return const_cast<config_2d_mem_access *>(this)
1903  ->payload_data.template select<1, 1>(2);
1904  }
1905 
1910  uint32_t get_surface_height() const {
1911  return const_cast<config_2d_mem_access *>(this)
1912  ->payload_data.template select<1, 1>(3);
1913  }
1914 
1919  uint32_t get_surface_pitch() const {
1920  return const_cast<config_2d_mem_access *>(this)
1921  ->payload_data.template select<1, 1>(4);
1922  }
1923 
1928  int32_t get_x() const {
1929  return const_cast<config_2d_mem_access *>(this)
1930  ->payload_data.template select<1, 1>(5);
1931  }
1932 
1937  int32_t get_y() const {
1938  return const_cast<config_2d_mem_access *>(this)
1939  ->payload_data.template select<1, 1>(6);
1940  }
1941 
1946  constexpr int32_t get_width() const { return BlockWidth; }
1947 
1952  constexpr int32_t get_height() const { return BlockHeight; }
1953 
1958  constexpr int32_t get_number_of_blocks() const { return NBlocks; }
1959 
1966  payload_data.template bit_cast_view<uint64_t>().template select<1, 1>(0) =
1967  (uint64_t)Ptr;
1968  return *this;
1969  }
1970 
1976  config_2d_mem_access &set_surface_width(uint32_t SurfaceWidth) {
1977  payload_data.template select<1, 1>(2) = SurfaceWidth;
1978  return *this;
1979  }
1980 
1986  config_2d_mem_access &set_surface_height(uint32_t SurfaceHeight) {
1987  payload_data.template select<1, 1>(3) = SurfaceHeight;
1988  return *this;
1989  }
1990 
1996  config_2d_mem_access &set_surface_pitch(uint32_t SurfacePitch) {
1997  payload_data.template select<1, 1>(4) = SurfacePitch;
1998  return *this;
1999  }
2000 
2007  payload_data.template select<1, 1>(5) = X;
2008  return *this;
2009  }
2010 
2017  payload_data.template select<1, 1>(6) = Y;
2018  return *this;
2019  }
2020 
2021 private:
2022  __ESIMD_NS::simd<uint32_t, 16> get_raw_data() { return payload_data; }
2023  __ESIMD_NS::simd<uint32_t, 16> payload_data;
2024 
2025  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2026  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
2027  int N>
2028  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T1, N> lsc_load_2d(
2030 
2031  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2032  cache_hint L1H, cache_hint L2H, int N>
2033  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(
2035  __ESIMD_NS::simd<T1, N> Data);
2036 
2037  template <typename T1, int BlockWidth1, int BlockHeight1, int NBlocks1,
2038  bool Transposed1, bool Transformed1, cache_hint L1H, cache_hint L2H,
2039  int N>
2040  friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
2042 };
2043 
2063 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2064  bool Transposed = false, bool Transformed = false,
2067  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2068 ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
2070  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2071  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2072  __ESIMD_DNS::block_2d_op::load>();
2073  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2074  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
2075  PropertyListT>();
2076  constexpr int ElemsPerDword = 4 / sizeof(T);
2077  constexpr int GRFRowSize = Transposed ? BlockHeight
2078  : Transformed ? BlockWidth * ElemsPerDword
2079  : BlockWidth;
2080  constexpr int GRFRowPitch = __ESIMD_DNS::getNextPowerOf2<GRFRowSize>();
2081  constexpr int GRFColSize =
2082  Transposed
2083  ? BlockWidth
2084  : (Transformed ? (BlockHeight + ElemsPerDword - 1) / ElemsPerDword
2085  : BlockHeight);
2086  constexpr int GRFBlockSize = GRFRowPitch * GRFColSize;
2087  constexpr int GRFBlockPitch =
2088  __ESIMD_DNS::roundUpNextMultiple<64 / sizeof(T), GRFBlockSize>();
2089  constexpr int ActualN = NBlocks * GRFBlockPitch;
2090 
2091  constexpr int DstBlockElements = GRFColSize * GRFRowSize;
2092  constexpr int DstElements = DstBlockElements * NBlocks;
2093 
2094  constexpr uint32_t GrfBytes = 64;
2095  constexpr uint32_t DstBlockSize =
2096  __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof(T), GrfBytes>();
2097  constexpr uint32_t DstLength =
2098  (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
2099  constexpr uint32_t DstLengthMask = DstLength << 20;
2100 
2101  static_assert(N == ActualN || N == DstElements, "Incorrect element count");
2102 
2103  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2104  << 17;
2105  constexpr uint32_t base_desc = 0x2000003;
2106  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2107  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2108  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2109  __ESIMD_NS::simd<T, N> oldDst;
2110  constexpr uint32_t exDesc = 0x0;
2111  constexpr uint32_t desc = base_desc | cache_mask | transformMask |
2112  transposeMask | dataSizeMask | DstLengthMask;
2113  constexpr uint8_t execSize = 1;
2114  constexpr uint8_t sfid = 0xF;
2115  constexpr uint8_t numSrc0 = 0x1;
2116  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
2117  __ESIMD_NS::simd<T, ActualN> Raw =
2118  __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
2119  oldDst, payload.get_raw_data(), exDesc, desc);
2120 
2121  if constexpr (ActualN == N) {
2122  return Raw;
2123  } else {
2124  // HW restrictions force data which is read to contain padding filled with
2125  // zeros for 2d lsc loads. This code eliminates such padding.
2126 
2127  __ESIMD_NS::simd<T, DstElements> Dst;
2128 
2129  for (auto i = 0; i < NBlocks; i++) {
2130  auto DstBlock =
2131  Dst.template select<DstBlockElements, 1>(i * DstBlockElements);
2132 
2133  auto RawBlock = Raw.template select<GRFBlockSize, 1>(i * GRFBlockPitch);
2134  DstBlock = RawBlock.template bit_cast_view<T, GRFColSize, GRFRowPitch>()
2135  .template select<GRFColSize, 1, GRFRowSize, 1>(0, 0)
2136  .template bit_cast_view<T>();
2137  }
2138 
2139  return Dst;
2140  }
2141 }
2142 
2159 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2160  bool Transposed = false, bool Transformed = false,
2163  T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
2164 ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(
2166  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2167  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
2168  PropertyListT>();
2169  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2170  T, BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
2172  static_assert(!Transposed || !Transformed,
2173  "Transposed and transformed is not supported");
2174  constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
2175  << 17;
2176  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2177  constexpr uint32_t base_desc = 0x2000003;
2178  constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0;
2179  constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0;
2180  constexpr uint32_t exDesc = 0x0;
2181  constexpr uint32_t desc =
2182  base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2183  constexpr uint8_t execSize = 1;
2184  constexpr uint8_t sfid = 0xF;
2185  constexpr uint8_t numDst = (N * sizeof(T)) / 64;
2186  __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data(), exDesc,
2187  desc);
2188 }
2189 
2205 template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2208  T, NBlocks, BlockHeight, BlockWidth, false, false>()>
2209 ESIMD_INLINE SYCL_ESIMD_FUNCTION void
2211  __ESIMD_NS::simd<T, N> Data) {
2212  __ESIMD_DNS::check_lsc_block_2d_restrictions<
2213  T, BlockWidth, BlockHeight, NBlocks, false, false,
2214  __ESIMD_DNS::block_2d_op::store>();
2215  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2216  __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store,
2217  PropertyListT>();
2218 
2219  constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2220  << 17;
2221  constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9;
2222  constexpr uint32_t base_desc = 0x2000007;
2223 
2224  constexpr uint32_t exDesc = 0x0;
2225  constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2226  constexpr uint8_t execSize = 1;
2227  constexpr uint8_t sfid = 0xF;
2228  constexpr uint8_t numSrc0 = 0x1;
2229  constexpr uint8_t numSrc1 = (N * sizeof(T)) / 64;
2230 
2231  __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1>(
2232  payload.get_raw_data(), Data, exDesc, desc);
2233 }
2234 
2235 namespace detail {
2236 
2237 // lsc_atomic_update() operations may share atomic_op values for data types
2238 // of the same (fp vs integral) class for convenience (e.g. re-use 'fmax' for
2239 // all FP types). In fact those data types may require using different internal
2240 // opcodes. This function returns the corresponding internal opcode for
2241 // the input type 'T' and operation 'Op'.
2242 template <typename T, __ESIMD_NS::atomic_op Op>
2243 constexpr int lsc_to_internal_atomic_op() {
2244  constexpr __ESIMD_NS::native::lsc::atomic_op LSCOp =
2245  __ESIMD_DNS::to_lsc_atomic_op<Op>();
2246  return static_cast<int>(LSCOp);
2247 }
2248 } // namespace detail
2249 
2263 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2264  lsc_data_size DS = lsc_data_size::default_size>
2265 __ESIMD_API __ESIMD_NS::simd<T, N>
2266 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2267  __ESIMD_NS::simd_mask<N> pred) {
2268  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, pred);
2269 }
2270 
2285 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2286  lsc_data_size DS = lsc_data_size::default_size>
2287 __ESIMD_API __ESIMD_NS::simd<T, N>
2288 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2289  __ESIMD_NS::simd<T, N> src0,
2290  __ESIMD_NS::simd_mask<N> pred) {
2291  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, pred);
2292 }
2293 
2309 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2310  lsc_data_size DS = lsc_data_size::default_size>
2311 __ESIMD_API __ESIMD_NS::simd<T, N>
2312 lsc_slm_atomic_update(__ESIMD_NS::simd<uint32_t, N> offsets,
2313  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2314  __ESIMD_NS::simd_mask<N> pred) {
2315  return __ESIMD_DNS::slm_atomic_update_impl<Op, T, N, DS>(offsets, src0, src1,
2316  pred);
2317 }
2318 
2333 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2334  lsc_data_size DS = lsc_data_size::default_size,
2336  typename Toffset>
2337 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0,
2338  __ESIMD_NS::simd<T, N>>
2339 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2340  __ESIMD_NS::simd_mask<N> pred) {
2341  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2342  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2343  p, offsets, pred);
2344 }
2345 
2346 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2347  lsc_data_size DS = lsc_data_size::default_size,
2349  typename Toffset>
2350 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2351  __ESIMD_DNS::get_num_args<Op>() == 0,
2352  __ESIMD_NS::simd<T, N>>
2353 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd_mask<N> pred = 1) {
2354  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2355  p, __ESIMD_NS::simd<Toffset, N>(offset), pred);
2356 }
2357 
2373 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2374  lsc_data_size DS = lsc_data_size::default_size,
2376  typename Toffset>
2377 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2378  __ESIMD_NS::simd<T, N>>
2379 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2380  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2381  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2382  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2383  p, offsets, src0, pred);
2384 }
2385 
2386 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2387  lsc_data_size DS = lsc_data_size::default_size,
2389  typename OffsetObjT, typename RegionTy>
2390 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1,
2391  __ESIMD_NS::simd<T, N>>
2392 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2393  __ESIMD_NS::simd<T, N> src0,
2394  __ESIMD_NS::simd_mask<N> pred = 1) {
2395  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2396  pred);
2397 }
2398 
2399 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2400  lsc_data_size DS = lsc_data_size::default_size,
2402  typename Toffset>
2403 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2404  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2405  ((Op != __ESIMD_NS::atomic_op::store &&
2406  Op != __ESIMD_NS::atomic_op::xchg) ||
2407  N == 1),
2408  __ESIMD_NS::simd<T, N>>
2409 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2410  __ESIMD_NS::simd_mask<N> pred = 1) {
2411  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2412  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, pred);
2413 }
2414 
2431 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2432  lsc_data_size DS = lsc_data_size::default_size,
2434  typename Toffset>
2435 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2436  __ESIMD_NS::simd<T, N>>
2437 lsc_atomic_update(T *p, __ESIMD_NS::simd<Toffset, N> offsets,
2438  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2439  __ESIMD_NS::simd_mask<N> pred) {
2440  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2441  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT, Toffset>(
2442  p, offsets, src0, src1, pred);
2443 }
2444 
2445 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2446  lsc_data_size DS = lsc_data_size::default_size,
2448  typename OffsetObjT, typename RegionTy>
2449 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2,
2450  __ESIMD_NS::simd<T, N>>
2451 lsc_atomic_update(T *p, __ESIMD_NS::simd_view<OffsetObjT, RegionTy> offsets,
2452  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2453  __ESIMD_NS::simd_mask<N> pred = 1) {
2454  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(p, offsets.read(), src0,
2455  src1, pred);
2456 }
2457 
2458 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2459  lsc_data_size DS = lsc_data_size::default_size,
2461  typename Toffset>
2462 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2463  __ESIMD_DNS::get_num_args<Op>() == 2,
2464  __ESIMD_NS::simd<T, N>>
2465 lsc_atomic_update(T *p, Toffset offset, __ESIMD_NS::simd<T, N> src0,
2466  __ESIMD_NS::simd<T, N> src1,
2467  __ESIMD_NS::simd_mask<N> pred = 1) {
2468  return lsc_atomic_update<Op, T, N, DS, L1H, L2H>(
2469  p, __ESIMD_NS::simd<Toffset, N>(offset), src0, src1, pred);
2470 }
2471 
2489 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2490  lsc_data_size DS = lsc_data_size::default_size,
2492  typename AccessorTy, typename Toffset>
2493 __ESIMD_API std::enable_if_t<
2494  __ESIMD_DNS::is_device_accessor_with_v<
2495  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_read> &&
2496  (Op == __ESIMD_NS::atomic_op::load ||
2497  __ESIMD_DNS::is_device_accessor_with_v<
2498  AccessorTy, __ESIMD_DNS::accessor_mode_cap::can_write>),
2499  __ESIMD_NS::simd<T, N>>
2500 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2501  __ESIMD_NS::simd_mask<N> pred) {
2502  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2503  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2504  acc, offsets, pred);
2505 }
2506 
2522 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2523  lsc_data_size DS = lsc_data_size::default_size,
2525  typename AccessorTy>
2526 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2527  __ESIMD_NS::simd<T, N>>
2528 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2529  __ESIMD_NS::simd_mask<N> pred) {
2530  return lsc_slm_atomic_update<Op, T, N, DS>(
2531  offsets + __ESIMD_DNS::localAccessorToOffset(acc), pred);
2532 }
2533 
2552 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2553  lsc_data_size DS = lsc_data_size::default_size,
2555  typename AccessorTy, typename Toffset>
2556 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2557  __ESIMD_NS::simd<T, N>>
2558 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2559  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2560  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2561  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2562  acc, offsets, src0, pred);
2563 }
2564 
2581 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2582  lsc_data_size DS = lsc_data_size::default_size,
2584  typename AccessorTy>
2585 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2586  __ESIMD_NS::simd<T, N>>
2587 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2588  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd_mask<N> pred) {
2589  return lsc_slm_atomic_update<Op, T, N, DS>(
2590  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, pred);
2591 }
2592 
2612 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2613  lsc_data_size DS = lsc_data_size::default_size,
2615  typename AccessorTy, typename Toffset>
2616 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_device_accessor_v<AccessorTy>,
2617  __ESIMD_NS::simd<T, N>>
2618 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<Toffset, N> offsets,
2619  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2620  __ESIMD_NS::simd_mask<N> pred) {
2621  using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2622  return __ESIMD_DNS::atomic_update_impl<Op, T, N, DS, PropertyListT>(
2623  acc, offsets, src0, src1, pred);
2624 }
2625 
2643 template <__ESIMD_NS::atomic_op Op, typename T, int N,
2644  lsc_data_size DS = lsc_data_size::default_size,
2646  typename AccessorTy>
2647 __ESIMD_API std::enable_if_t<__ESIMD_DNS::is_rw_local_accessor_v<AccessorTy>,
2648  __ESIMD_NS::simd<T, N>>
2649 lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
2650  __ESIMD_NS::simd<T, N> src0, __ESIMD_NS::simd<T, N> src1,
2651  __ESIMD_NS::simd_mask<N> pred) {
2652  return lsc_slm_atomic_update<Op, T, N, DS>(
2653  offsets + __ESIMD_DNS::localAccessorToOffset(acc), src0, src1, pred);
2654 }
2655 
2664 template <lsc_memory_kind Kind = lsc_memory_kind::untyped_global,
2665  lsc_fence_op FenceOp = lsc_fence_op::none,
2666  lsc_scope Scope = lsc_scope::group, int N = 16>
2667 __SYCL_DEPRECATED("use sycl::ext::intel::esimd::fence<Kind, FenceOp, Scope>()")
2668 __ESIMD_API void lsc_fence(__ESIMD_NS::simd_mask<N> pred = 1) {
2669  static_assert(
2670  Kind != lsc_memory_kind::shared_local ||
2671  (FenceOp == lsc_fence_op::none && Scope == lsc_scope::group),
2672  "SLM fence must have 'none' lsc_fence_op and 'group' scope");
2673  static_assert(Kind != lsc_memory_kind::untyped_global_low_pri,
2674  "lsc_memory_kind::untyped_global_low_pri is not supported in HW"
2675  " and/or GPU drivers");
2676  __esimd_lsc_fence<static_cast<uint8_t>(Kind), static_cast<uint8_t>(FenceOp),
2677  static_cast<uint8_t>(Scope), N>(pred.data());
2678 }
2679 
2681 
2684 
2687 
2689 __ESIMD_API int32_t get_hw_thread_id() {
2690 #ifdef __SYCL_DEVICE_ONLY__
2691  return __spirv_BuiltInGlobalHWThreadIDINTEL();
2692 #else
2693  return std::rand();
2694 #endif // __SYCL_DEVICE_ONLY__
2695 }
2697 __ESIMD_API int32_t get_subdevice_id() {
2698 #ifdef __SYCL_DEVICE_ONLY__
2699  return __spirv_BuiltInSubDeviceIDINTEL();
2700 #else
2701  return 0;
2702 #endif
2703 }
2704 
2706 
2707 } // namespace experimental::esimd
2708 
2709 namespace esimd {
2710 
2714 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2715 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2716  __ESIMD_DNS::get_num_args<Op>() == 0,
2717  simd<T, N>>
2719  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2720  p, offset, mask);
2721 }
2722 
2723 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2724  typename RegionTy>
2725 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0, simd<T, N>>
2727  simd_mask<N> mask = 1) {
2728  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2729  p, offsets, mask);
2730 }
2731 
2732 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2733 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2734  __ESIMD_DNS::get_num_args<Op>() == 0,
2735  simd<T, N>>
2736 atomic_update(T *p, Toffset offset, simd_mask<N> mask = 1) {
2737  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2738  p, offset, mask);
2739 }
2740 
2742 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2743 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2744  __ESIMD_DNS::get_num_args<Op>() == 1,
2745  simd<T, N>>
2747  simd_mask<N> mask) {
2748  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2749  p, offset, src0, mask);
2750 }
2751 
2752 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2753  typename RegionTy>
2754 __ESIMD_API __ESIMD_API
2755  std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1, simd<T, N>>
2757  simd<T, N> src0, simd_mask<N> mask = 1) {
2758  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2759  p, offsets, src0, mask);
2760 }
2761 
2762 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2763 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2764  __ESIMD_DNS::get_num_args<Op>() == 1,
2765  simd<T, N>>
2766 atomic_update(T *p, Toffset offset, simd<T, N> src0, simd_mask<N> mask = 1) {
2767  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2768  p, offset, src0, mask);
2769 }
2770 
2772 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2773 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2774  __ESIMD_DNS::get_num_args<Op>() == 2,
2775  simd<T, N>>
2777  simd_mask<N> mask) {
2778  // 2-argument lsc_atomic_update arguments order matches the standard one -
2779  // expected value first, then new value. But atomic_update uses reverse
2780  // order, hence the src1/src0 swap.
2781  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2782  p, offset, src1, src0, mask);
2783 }
2784 
2785 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2786  typename RegionTy>
2787 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2, simd<T, N>>
2789  simd<T, N> src1, simd_mask<N> mask = 1) {
2790  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2791  p, offsets, src1, src0, mask);
2792 }
2793 
2794 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset>
2795 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2796  __ESIMD_DNS::get_num_args<Op>() == 2,
2797  __ESIMD_NS::simd<T, N>>
2799  simd_mask<N> mask = 1) {
2800  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2801  p, offset, src1, src0, mask);
2802 }
2803 
2804 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2805  typename AccessorTy>
2806 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2807  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2808  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2809  simd<T, N>>
2810 atomic_update(AccessorTy acc, simd<Toffset, N> offset, simd_mask<N> mask) {
2811  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2812  acc, offset, mask);
2813 }
2814 
2815 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2816  typename RegionTy, typename AccessorTy>
2817 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 0 &&
2818  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2819  simd<T, N>>
2821  simd_mask<N> mask) {
2822  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2823  acc, offsets, mask);
2824 }
2825 
2826 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2827  typename AccessorTy>
2828 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2829  __ESIMD_DNS::get_num_args<Op>() == 0 &&
2830  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2831  simd<T, N>>
2832 atomic_update(AccessorTy acc, Toffset offset, simd_mask<N> mask) {
2833  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2834  acc, offset, mask);
2835 }
2836 
2838 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2839  typename AccessorTy>
2840 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2841  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2842  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2843  simd<T, N>>
2845  simd_mask<N> mask) {
2846  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2847  acc, offset, src0, mask);
2848 }
2849 
2850 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2851  typename RegionTy, typename AccessorTy>
2852 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 1 &&
2853  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2854  simd<T, N>>
2856  simd<T, N> src0, simd_mask<N> mask) {
2857  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2858  acc, offsets, src0, mask);
2859 }
2860 
2861 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2862  typename AccessorTy>
2863 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2864  __ESIMD_DNS::get_num_args<Op>() == 1 &&
2865  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2866  simd<T, N>>
2867 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0,
2868  simd_mask<N> mask) {
2869  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2870  acc, offset, src0, mask);
2871 }
2872 
2874 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2875  typename AccessorTy>
2876 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2877  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2878  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2879  simd<T, N>>
2881  simd<T, N> src1, simd_mask<N> mask) {
2882  // 2-argument lsc_atomic_update arguments order matches the standard one -
2883  // expected value first, then new value. But atomic_update uses reverse
2884  // order, hence the src1/src0 swap.
2885  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2886  acc, offset, src1, src0, mask);
2887 }
2888 
2889 template <native::lsc::atomic_op Op, typename T, int N, typename OffsetObjT,
2890  typename RegionTy, typename AccessorTy>
2891 __ESIMD_API std::enable_if_t<__ESIMD_DNS::get_num_args<Op>() == 2 &&
2892  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2893  simd<T, N>>
2896  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2897  acc, offsets, src1, src0, mask);
2898 }
2899 
2900 template <native::lsc::atomic_op Op, typename T, int N, typename Toffset,
2901  typename AccessorTy>
2902 __ESIMD_API std::enable_if_t<std::is_integral_v<Toffset> &&
2903  __ESIMD_DNS::get_num_args<Op>() == 2 &&
2904  __ESIMD_DNS::is_rw_accessor_v<AccessorTy>,
2905  __ESIMD_NS::simd<T, N>>
2906 atomic_update(AccessorTy acc, Toffset offset, simd<T, N> src0, simd<T, N> src1,
2907  simd_mask<N> mask) {
2908  return __ESIMD_ENS::lsc_atomic_update<detail::to_atomic_op<Op>(), T, N>(
2909  acc, offset, src1, src0, mask);
2910 }
2911 
2929 template <int SLMAmount> class slm_allocator {
2930  int offset;
2931 
2932 public:
2934  slm_allocator() { offset = __esimd_slm_alloc(SLMAmount); }
2935 
2937  ESIMD_INLINE int get_offset() const { return offset; }
2938 
2940  ~slm_allocator() { __esimd_slm_free(offset); }
2941 };
2942 
2943 } // namespace esimd
2944 } // namespace ext::intel
2945 } // namespace _V1
2946 } // namespace sycl
Definition: simd.hpp:1387
This class represents a reference to a sub-region of a base simd object.
Definition: simd_view.hpp:37
The main simd vector class.
Definition: simd.hpp:53
RAII-style class used to implement "semi-dynamic" SLM allocation.
Definition: memory.hpp:2929
~slm_allocator()
Releases the SLM chunk allocated in the constructor.
Definition: memory.hpp:2940
slm_allocator()
Allocates the amount of SLM which is class' template parameter.
Definition: memory.hpp:2934
ESIMD_INLINE int get_offset() const
Definition: memory.hpp:2937
Container class to hold parameters for load2d/store2d functions
Definition: memory.hpp:1847
T * get_data_pointer() const
Get a surface base address
Definition: memory.hpp:1891
config_2d_mem_access & set_x(int32_t X)
Sets top left corner X coordinate of the block
Definition: memory.hpp:2006
constexpr int32_t get_number_of_blocks() const
Get number of blocks
Definition: memory.hpp:1958
constexpr int32_t get_width() const
Get width of the block
Definition: memory.hpp:1946
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_store_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload, sycl::ext::intel::esimd::simd< T1, N > Data)
config_2d_mem_access & set_surface_width(uint32_t SurfaceWidth)
Sets surface width
Definition: memory.hpp:1976
int32_t get_x() const
Get top left corner X coordinate of the block
Definition: memory.hpp:1928
config_2d_mem_access & set_data_pointer(T *Ptr)
Sets surface base address
Definition: memory.hpp:1965
config_2d_mem_access & set_surface_height(uint32_t SurfaceHeight)
Sets surface height
Definition: memory.hpp:1986
config_2d_mem_access(const config_2d_mem_access &other)
Copy constructor
Definition: memory.hpp:1860
config_2d_mem_access & set_y(int32_t Y)
Sets top left corner Y coordinate of the block
Definition: memory.hpp:2016
config_2d_mem_access(const T *Ptr, uint32_t SurfaceWidth, uint32_t SurfaceHeight, uint32_t SurfacePitch, int32_t X, int32_t Y)
Constructor
Definition: memory.hpp:1874
config_2d_mem_access & set_surface_pitch(uint32_t SurfacePitch)
Sets surface pitch
Definition: memory.hpp:1996
constexpr int32_t get_height() const
Get height of the block
Definition: memory.hpp:1952
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION sycl::ext::intel::esimd::simd< T1, N > lsc_load_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
int32_t get_y() const
Get top left corner Y coordinate of the block
Definition: memory.hpp:1937
friend ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d(config_2d_mem_access< T1, BlockWidth1, BlockHeight1, NBlocks1 > &payload)
("use sycl::ext::intel::esimd::memory_kind") lsc_memory_kind __ESIMD_DNS::lsc_data_size lsc_data_size
The scope that lsc_fence operation should apply to Supported platforms: DG2, PVC.
Definition: common.hpp:64
sycl::ext::intel::esimd::cache_hint cache_hint
L1 or L2 cache hint kinds.
Definition: common.hpp:108
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
split_barrier_action
Represents a split barrier action.
Definition: common.hpp:111
__ESIMD_API int32_t get_subdevice_id()
Get subdevice ID.
Definition: memory.hpp:2697
__ESIMD_API int32_t get_hw_thread_id()
Get HW Thread ID.
Definition: memory.hpp:2689
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:179
__SYCL_DEPRECATED("Please use sycl::ext::intel::esimd::addc(carry, src0, src1);") __ESIMD_API sycl
Definition: math.hpp:500
__ESIMD_API SZ src0
Definition: math.hpp:179
__ESIMD_API void lsc_slm_block_store(uint32_t offset, sycl::ext::intel::esimd::simd< T, NElts > vals, FlagsT flags=FlagsT{})
Transposed SLM scatter with 1 channel.
Definition: memory.hpp:1406
__ESIMD_API void lsc_prefetch(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer prefetch gather.
Definition: memory.hpp:1239
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0, sycl::ext::intel::esimd::simd< T, N > > lsc_atomic_update(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
USM pointer atomic.
Definition: memory.hpp:2339
__ESIMD_API void lsc_slm_scatter(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM scatter.
Definition: memory.hpp:1386
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_load_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block load.
Definition: memory.hpp:1760
__ESIMD_API sycl::ext::intel::esimd::simd< T, N > lsc_slm_atomic_update(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred)
SLM atomic.
Definition: memory.hpp:2266
__ESIMD_API void lsc_store_2d(T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y, sycl::ext::intel::esimd::simd< T, N > Vals)
2D USM pointer block store.
Definition: memory.hpp:1831
atomic_op
LSC atomic operation codes.
Definition: common.hpp:39
__ESIMD_API sycl::ext::intel::esimd::simd< T, NElts > lsc_slm_block_load(uint32_t offset, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT flags=FlagsT{})
Transposed SLM gather with 1 channel.
Definition: memory.hpp:611
__ESIMD_API void lsc_fence(sycl::ext::intel::esimd::simd_mask< N > pred=1)
Memory fence.
Definition: memory.hpp:2668
__ESIMD_API void lsc_prefetch_2d(const T *Ptr, unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, int X, int Y)
2D USM pointer block prefetch.
Definition: memory.hpp:1794
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_slm_gather(sycl::ext::intel::esimd::simd< uint32_t, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
SLM gather.
Definition: memory.hpp:561
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT >, sycl::ext::intel::esimd::simd< T, NElts > > lsc_block_load(const T *p, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed gather with 1 channel.
Definition: memory.hpp:942
__ESIMD_API void lsc_scatter(T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd< T, N *NElts > vals, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer scatter.
Definition: memory.hpp:1437
__ESIMD_API std::enable_if_t< sycl::ext::intel::esimd::is_simd_flag_type_v< FlagsT > > lsc_block_store(T *p, sycl::ext::intel::esimd::simd< T, NElts > vals, sycl::ext::intel::esimd::simd_mask< 1 > pred=1, FlagsT={})
USM pointer transposed scatter with 1 channel.
Definition: memory.hpp:1571
__ESIMD_API sycl::ext::intel::esimd::simd< T, N *NElts > lsc_gather(const T *p, sycl::ext::intel::esimd::simd< Toffset, N > offsets, sycl::ext::intel::esimd::simd_mask< N > pred=1)
USM pointer gather.
Definition: memory.hpp:668
__ESIMD_API void named_barrier_wait(uint8_t id)
Wait on a named barrier Available only on PVC.
Definition: memory.hpp:368
__ESIMD_API std::enable_if_t<(sizeof(T) *N >=2)> wait(sycl::ext::intel::esimd::simd< T, N > value)
Create explicit scoreboard dependency to avoid device code motion across this call and preserve the v...
Definition: memory.hpp:421
__ESIMD_API void named_barrier_init()
Initialize number of named barriers for a kernel Available only on PVC.
Definition: memory.hpp:376
__ESIMD_API void named_barrier_signal(uint8_t barrier_id, uint8_t producer_consumer_mode, uint32_t num_producers, uint32_t num_consumers)
Perform signal operation for the given named barrier Available only on PVC.
Definition: memory.hpp:392
__ESIMD_API std::enable_if_t< __ESIMD_DNS::get_num_args< Op >)==0 &&__ESIMD_DNS::is_rw_local_accessor_v< AccessorT >, simd< T, N > > atomic_update(AccessorT lacc, simd< uint32_t, N > byte_offset, simd_mask< N > mask=1)
simd<T, N> atomic_update(local_accessor lacc, simd<uint32_t, N> byte_offset, simd_mask<N> pred = 1); ...
Definition: memory.hpp:5901
__ESIMD_API void split_barrier()
Generic work-group split barrier.
Definition: memory.hpp:28
__ESIMD_API std::enable_if_t< detail::is_device_accessor_with_v< AccessorT, detail::accessor_mode_cap::can_read > &&ext::oneapi::experimental::is_property_list_v< PropertyListT > > prefetch(AccessorT acc, PropertyListT props={})
template <typename T, int VS = 1, typename AccessorT, typename PropertyListT = empty_properties_t> vo...
Definition: memory.hpp:9323
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:7951
@ global_coherent_fence
“Commit enable” - wait for fence to complete before continuing.
Definition: memory.hpp:7888
@ local_barrier
Issue SLM memory barrier only. If not set, the memory barrier is global.
Definition: memory.hpp:7906
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_send(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw send.
Definition: memory.hpp:159
__ESIMD_API sycl::ext::intel::esimd::simd< T1, n1 > raw_sends(sycl::ext::intel::esimd::simd< T1, n1 > msgDst, sycl::ext::intel::esimd::simd< T2, n2 > msgSrc0, sycl::ext::intel::esimd::simd< T3, n3 > msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT=0, uint8_t isSendc=0, sycl::ext::intel::esimd::simd_mask< N > mask=1)
Raw sends.
Definition: memory.hpp:69
typename make_L1_L2_alignment_properties< L1H, L2H, Alignment >::type make_L1_L2_alignment_properties_t
constexpr alignment_key::value_t< K > alignment
cache_hint
L1, L2 or L3 cache hints.
ESIMD_INLINE sycl::ext::intel::esimd::simd< RT, N > lsc_format_input(sycl::ext::intel::esimd::simd< T, N > Vals)
Definition: memory.hpp:461
ESIMD_INLINE sycl::ext::intel::esimd::simd< T, N > lsc_format_ret(sycl::ext::intel::esimd::simd< T1, N > Vals)
Definition: memory.hpp:468
Definition: access.hpp:18
ValueT length(const ValueT *a, const int len)
Calculate the square root of the input array.
Definition: math.hpp:152