DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_intrin.hpp
Go to the documentation of this file.
1 //==------------ memory_intrin.hpp - DPC++ Explicit SIMD API ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Declares Explicit SIMD intrinsics used to implement working with
9 // the SIMD classes objects.
10 //===----------------------------------------------------------------------===//
11 
13 
14 #pragma once
15 
16 #include <sycl/accessor.hpp>
20 #include <sycl/types.hpp>
21 
22 #include <cstdint>
23 
24 namespace sycl {
25 inline namespace _V1 {
26 
27 namespace ext::intel::esimd {
28 template <typename AccessorTy>
29 __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc);
30 } // namespace ext::intel::esimd
31 
32 namespace ext::intel::esimd::detail {
33 
34 // Provides access to sycl accessor class' private members.
35 class AccessorPrivateProxy {
36 public:
37  template <typename AccessorTy>
38  static auto getQualifiedPtrOrImageObj(const AccessorTy &Acc) {
39 #ifdef __SYCL_DEVICE_ONLY__
40  if constexpr (sycl::detail::acc_properties::is_image_accessor_v<AccessorTy>)
41  return Acc.getNativeImageObj();
42  else
43  return Acc.getQualifiedPtr();
44 #else // __SYCL_DEVICE_ONLY__
45  return Acc;
46 #endif // __SYCL_DEVICE_ONLY__
47  }
48 
49 #ifndef __SYCL_DEVICE_ONLY__
50  static void *getPtr(const sycl::detail::AccessorBaseHost &Acc) {
51  return Acc.getPtr();
52  }
53 #endif // __SYCL_DEVICE_ONLY__
54 };
55 
56 template <int ElemsPerAddr,
57  typename = std::enable_if_t<(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
58  ElemsPerAddr == 4)>>
59 constexpr unsigned int ElemsPerAddrEncoding() {
60  // encoding requires log2 of ElemsPerAddr
61  if constexpr (ElemsPerAddr == 1)
62  return 0;
63  else if constexpr (ElemsPerAddr == 2)
64  return 1;
65  else if constexpr (ElemsPerAddr == 4)
66  return 2;
67 
68  // other cases not needed since std::enable_if disallows other values
69 }
70 
71 constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
72  // encoding requires 2^ElemsPerAddrEncoded
73  return (1 << ElemsPerAddrEncoded);
74 }
75 
76 } // namespace ext::intel::esimd::detail
77 } // namespace _V1
78 } // namespace sycl
79 
80 // flat_read does flat-address gather
81 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
82 __ESIMD_INTRIN
83  __ESIMD_DNS::vector_type_t<Ty,
84  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
85  __esimd_svm_gather(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
86  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
87  __ESIMD_INTRIN_END;
88 
89 // flat_write does flat-address scatter
90 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
91 __ESIMD_INTRIN void __esimd_svm_scatter(
92  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
93  __ESIMD_DNS::vector_type_t<Ty,
94  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
95  vals,
96  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1) __ESIMD_INTRIN_END;
97 
98 // Reads a block of data from given surface at given offset.
99 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
100 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
101 __esimd_oword_ld_unaligned(SurfIndAliasTy surf_ind,
102  uint32_t offset) __ESIMD_INTRIN_END;
103 
104 // Writes given block of data to a surface with given index at given offset.
105 template <typename Ty, int N, typename SurfIndAliasTy>
106 __ESIMD_INTRIN void
107 __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t owords_offset,
108  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
109 
110 // Read a block of data from the given address.
111 template <typename Ty, int N, size_t Align>
112 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_block_ld(
113  const __ESIMD_DNS::vector_type_t<Ty, N> *addr) __ESIMD_INTRIN_END;
114 
115 // flat_block_write writes a block of data using one flat address
116 template <typename Ty, int N, size_t Align>
117 __ESIMD_INTRIN void
118 __esimd_slm_block_st(uint32_t offset,
119  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
120 
138 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
139  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
140  __ESIMD_DNS::lsc_vector_size VS,
141  __ESIMD_DNS::lsc_data_order _Transposed, int N>
142 __ESIMD_INTRIN void __esimd_lsc_store_slm(
143  __ESIMD_DNS::simd_mask_storage_t<N> pred,
144  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
145  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals)
146  __ESIMD_INTRIN_END;
147 
164 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
165  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
166  __ESIMD_DNS::lsc_vector_size VS,
167  __ESIMD_DNS::lsc_data_order _Transposed, int N>
168 __ESIMD_INTRIN void __esimd_lsc_prefetch_stateless(
169  __ESIMD_DNS::simd_mask_storage_t<N> pred,
170  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs) __ESIMD_INTRIN_END;
171 
190 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
191  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
192  __ESIMD_DNS::lsc_vector_size VS,
193  __ESIMD_DNS::lsc_data_order Transposed, int N,
194  typename SurfIndAliasTy>
195 __ESIMD_INTRIN void
196 __esimd_lsc_prefetch_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
197  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
198  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
199 
200 // Read a block of data from SLM at the given offset.
201 template <typename Ty, int N, size_t Align>
202 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
203 __esimd_slm_block_ld(uint32_t offset) __ESIMD_INTRIN_END;
204 
205 // flat_block_write writes a block of data using one flat address
206 template <typename Ty, int N, size_t Align>
207 __ESIMD_INTRIN void
208 __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
209  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
210 
231 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
232  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
233  __ESIMD_DNS::lsc_vector_size VS,
234  __ESIMD_DNS::lsc_data_order _Transposed, int N>
235 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
236 __esimd_lsc_load_merge_slm(
237  __ESIMD_DNS::simd_mask_storage_t<N> pred,
238  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
239  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> pass_thru)
240  __ESIMD_INTRIN_END;
241 
245 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
246  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
247  __ESIMD_DNS::lsc_vector_size VS,
248  __ESIMD_DNS::lsc_data_order _Transposed, int N>
249 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
250 __esimd_lsc_load_slm(__ESIMD_DNS::simd_mask_storage_t<N> pred,
251  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
252  __ESIMD_INTRIN_END;
253 
254 // Gather data from the given global or private addresses.
255 template <typename T, int N, size_t Align>
256 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_gather_ld(
257  __ESIMD_DNS::vector_type_t<uint64_t, N> vptr,
258  __ESIMD_DNS::simd_mask_storage_t<N> pred,
259  __ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;
260 
261 // Gather data from the given SLM addresses.
262 template <typename T, int N, size_t Align>
263 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_slm_gather_ld(
264  __ESIMD_DNS::vector_type_t<uint32_t, N> vptr,
265  __ESIMD_DNS::simd_mask_storage_t<N> pred,
266  __ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;
267 
268 // Scatter data to given global or private addresses.
269 template <typename T, int N, size_t Align>
270 __ESIMD_INTRIN void
271 __esimd_scatter_st(__ESIMD_DNS::vector_type_t<T, N> vals,
272  __ESIMD_DNS::vector_type_t<uint64_t, N> vptr,
273  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
274 
275 // Scatter data to given SLM addresses.
276 template <typename T, int N, size_t Align>
277 __ESIMD_INTRIN void __esimd_slm_scatter_st(
278  __ESIMD_DNS::vector_type_t<T, N> vals,
279  __ESIMD_DNS::vector_type_t<uint32_t, N> vptr,
280  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
281 
306 template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
307  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
308  __ESIMD_DNS::lsc_vector_size VS,
309  __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
310 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
311 __esimd_lsc_load_merge_bti(
312  __ESIMD_DNS::simd_mask_storage_t<N> pred,
313  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets, SurfIndAliasT surf_ind,
314  __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()> PassThru)
315  __ESIMD_INTRIN_END;
316 
320 template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
321  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
322  __ESIMD_DNS::lsc_vector_size VS,
323  __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
324 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
325 __esimd_lsc_load_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
326  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
327  SurfIndAliasT surf_ind) __ESIMD_INTRIN_END;
328 
329 // flat_read4 does flat-address gather4
330 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
331 __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
332  __ESIMD_INTRIN __esimd_svm_gather4_scaled(
333  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
334  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1) __ESIMD_INTRIN_END;
335 
336 // flat_write does flat-address scatter
337 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
338 __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
339  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
340  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals,
341  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1) __ESIMD_INTRIN_END;
342 
343 // Low-level surface-based scatter. Writes elements of a \ref simd object into a
344 // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is
345 // always represented as a 4-byte value within the input simd object,
346 // unused (not written) upper bytes are ignored.
347 // Template (compile-time constant) parameters:
348 // @tparam Ty - element type; can only be a 4-byte integer or \c float,
349 // @tparam N - the number of elements to write
350 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
351 // accessor used to denote the surface
352 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
353 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
354 // @tparam Scale - offset scale; only 0 is supported for now
355 //
356 // Formal parameters:
357 // @param pred - per-element predicates; elements with zero corresponding
358 // predicates are not written
359 // @param surf_ind - the surface index, taken from the SYCL memory object
360 // @param global_offset - offset added to each individual element's offset to
361 // compute actual memory access offset for that element
362 // @param elem_offsets - per-element offsets
363 // @param vals - values to write
364 //
365 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
366  int16_t Scale = 0>
367 __ESIMD_INTRIN void __esimd_scatter_scaled(
368  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
369  uint32_t global_offset,
370  __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets,
371  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
372 
373 // flat_atomic: flat-address atomic
374 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
375 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic0(
376  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
377  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
378 
379 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
380 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic1(
381  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
382  __ESIMD_DNS::vector_type_t<Ty, N> src0,
383  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
384 
385 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
386 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic2(
387  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
388  __ESIMD_DNS::vector_type_t<Ty, N> src0,
389  __ESIMD_DNS::vector_type_t<Ty, N> src1,
390  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
391 
407 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
408  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
409  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
410  __ESIMD_DNS::lsc_data_order Transposed, int N>
411 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
412 __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
413  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs)
414  __ESIMD_INTRIN_END;
415 
429 
433 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
434  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
435  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
436  __ESIMD_DNS::lsc_data_order Transposed, int N>
437 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
438 __esimd_lsc_xatomic_stateless_1(
439  __ESIMD_DNS::simd_mask_storage_t<N> pred,
440  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
441  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0)
442  __ESIMD_INTRIN_END;
443 
461 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
462  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
463  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
464  __ESIMD_DNS::lsc_data_order Transposed, int N>
465 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
466 __esimd_lsc_xatomic_stateless_2(
467  __ESIMD_DNS::simd_mask_storage_t<N> Pred,
468  __ESIMD_DNS::vector_type_t<uintptr_t, N> Addrs,
469  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
470  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1)
471  __ESIMD_INTRIN_END;
472 
490 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
491  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
492  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
493  __ESIMD_DNS::lsc_data_order Transposed, int N,
494  typename SurfIndAliasTy>
495 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
496 __esimd_lsc_xatomic_bti_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
497  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
498  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
499 
518 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
519  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
520  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
521  __ESIMD_DNS::lsc_data_order _Transposed, int N,
522  typename SurfIndAliasTy>
523 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
524 __esimd_lsc_xatomic_bti_1(
525  __ESIMD_DNS::simd_mask_storage_t<N> pred,
526  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
527  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
528  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
529 
549 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
550  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
551  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
552  __ESIMD_DNS::lsc_data_order Transposed, int N,
553  typename SurfIndAliasTy>
554 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
555 __esimd_lsc_xatomic_bti_2(
556  __ESIMD_DNS::simd_mask_storage_t<N> pred,
557  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
558  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
559  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1,
560  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
561 
577 template <typename Ty, int InternalOpOp, __ESIMD_NS::cache_hint L1H,
578  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
579  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
580  __ESIMD_DNS::lsc_data_order Transposed, int N>
581 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
582 __esimd_lsc_xatomic_slm_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
583  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
584  __ESIMD_INTRIN_END;
585 
602 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
603  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
604  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
605  __ESIMD_DNS::lsc_data_order Transposed, int N>
606 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
607 __esimd_lsc_xatomic_slm_1(
608  __ESIMD_DNS::simd_mask_storage_t<N> pred,
609  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
610  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0)
611  __ESIMD_INTRIN_END;
612 
630 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
631  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
632  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
633  __ESIMD_DNS::lsc_data_order Transposed, int N>
634 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
635 __esimd_lsc_xatomic_slm_2(
636  __ESIMD_DNS::simd_mask_storage_t<N> pred,
637  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
638  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
639  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1)
640  __ESIMD_INTRIN_END;
641 
642 __ESIMD_INTRIN void __esimd_slm_init(uint32_t size) __ESIMD_INTRIN_END;
643 
644 // esimd_barrier, generic group barrier
645 __ESIMD_INTRIN void __esimd_barrier() __ESIMD_INTRIN_END;
646 
647 // slm_fence sets the SLM read/write order
648 __ESIMD_INTRIN void __esimd_fence(uint8_t cntl) __ESIMD_INTRIN_END;
649 
658 template <uint8_t Kind, uint8_t FenceOp, uint8_t Scope, int N>
659 __ESIMD_INTRIN void
660 __esimd_lsc_fence(__ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
661 
662 // Predicated (masked) scaled gather from a surface.
663 //
664 // Template (compile-time constant) parameters:
665 // @tparam Ty - element type
666 // @tparam N - the number of elements to read
667 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
668 // accessor used to denote the surface
669 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
670 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
671 // @tparam Scale - offset scale; only 0 is supported for now
672 //
673 // Formal parameters:
674 // @param surf_ind - the surface index, taken from the SYCL memory object
675 // @param global_offset - offset added to each individual element's offset to
676 // compute actual memory access offset for that element
677 // @param offsets - per-element offsets
678 // @param pred - per-element predicates; elements with zero corresponding
679 // predicates are not written
680 // @return - elements read ("gathered") from memory
681 
682 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
683  int16_t Scale = 0>
684 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_gather_masked_scaled2(
685  SurfIndAliasTy surf_ind, uint32_t global_offset,
686  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
687  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
688 
689 // Reads a block of data from given surface at given `offset` counted
690 // in 16-byte chunks.
691 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
692 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
693 __esimd_oword_ld(SurfIndAliasTy surf_ind,
694  uint32_t owords_offset) __ESIMD_INTRIN_END;
695 
696 // gather4 scaled masked from a surface/SLM
697 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask,
698  typename SurfIndAliasTy, int16_t Scale = 0>
699 __ESIMD_INTRIN
700  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
701  __esimd_gather4_masked_scaled2(
702  SurfIndAliasTy surf_ind, int global_offset,
703  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
704  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
705 
706 // scatter4 scaled to a surface/SLM
707 template <typename Ty, int N, typename SurfIndAliasTy,
708  __ESIMD_NS::rgba_channel_mask Mask, int16_t Scale = 0>
709 __ESIMD_INTRIN void __esimd_scatter4_scaled(
710  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
711  int global_offset, __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
712  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals)
713  __ESIMD_INTRIN_END;
714 
715 // Surface-based atomic operations
716 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
717 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic0(
718  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
719  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs) __ESIMD_INTRIN_END;
720 
721 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
722 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic1(
723  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
724  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
725  __ESIMD_DNS::vector_type_t<Ty, N> src0) __ESIMD_INTRIN_END;
726 
727 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
728 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic2(
729  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
730  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
731  __ESIMD_DNS::vector_type_t<Ty, N> src0,
732  __ESIMD_DNS::vector_type_t<Ty, N> src1) __ESIMD_INTRIN_END;
733 
734 // Media block load.
735 //
736 // @tparam Ty the element data type.
737 // @tparam M the hight of the 2D block.
738 // @tparam N the width of the 2D block.
739 // @tparam Modifier top/bottom field surface access control.
740 // @tparam TACC type of the surface handle.
741 // @tparam Plane planar surface index.
742 // @tparam BlockWidth the width of the return block.
743 // @param handle the surface handle.
744 // @param x X-coordinate of the left upper rectangle corner in BYTES.
745 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
746 //
747 // @return the linearized 2D block data read from surface.
748 //
749 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
750  int BlockWidth>
751 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, M * N>
752 __esimd_media_ld(TACC handle, unsigned x, unsigned y) __ESIMD_INTRIN_END;
753 
754 // Media block store
755 //
756 // @tparam Ty the element data type.
757 // @tparam M the hight of the 2D block.
758 // @tparam N the width of the 2D block.
759 // @tparam Modifier top/bottom field surface access control.
760 // @tparam TACC type of the surface handle.
761 // @tparam Plane planar surface index.
762 // @tparam BlockWidth the width of the return block.
763 // @param handle the surface handle.
764 // @param x X-coordinate of the left upper rectangle corner in BYTES.
765 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
766 // @param vals the linearized 2D block data to be written to surface.
767 //
768 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
769  int BlockWidth>
770 __ESIMD_INTRIN void
771 __esimd_media_st(TACC handle, unsigned x, unsigned y,
772  __ESIMD_DNS::vector_type_t<Ty, M * N> vals) __ESIMD_INTRIN_END;
773 
774 // \brief Converts given value to a surface index.
775 // The input must always be a result of
776 // detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc)
777 // where acc is a buffer or image accessor. If the result is, say, 'obj', then
778 // 'obj' is really a value of the surface index kept in a differently typed
779 // accessor field. Front-end compilation time type of 'obj' is either
780 // ConcreteASPtrType (detail::DecoratedType<DataT, AS>::type *), for a buffer
781 // or
782 // image{1,2,3}d_t OpenCL type for an image
783 // But when doing code generation, FE replaces e.g. '__read_only image2d_t' FE
784 // type with '%opencl.image2d_ro_t addrspace(1) *' LLVM type or a Target
785 // Extension Type if using opaque pointers. These types can neither be
786 // reinterpret_cast'ed from pointer to intptr_t (because they are not a pointer
787 // at FE translation time), nor can they be bit_cast'ed to intptr_t (because
788 // they are not trivially copyable). This function takes advantage of the fact
789 // that in SPIR-V 'obj' is always a pointer, where we can do ptr to uint32_t
790 // conversion. This function can be called only from the device code, as
791 // accessor => memory handle translation for host is different.
792 // @param acc the SYCL accessor.
793 // Returns the binding table index value.
794 template <typename MemObjTy>
795 ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj) {
796 #ifdef __SYCL_DEVICE_ONLY__
797  return __spirv_ConvertPtrToU<MemObjTy, uint32_t>(obj);
798 #else // __SYCL_DEVICE_ONLY__
799  __ESIMD_UNSUPPORTED_ON_HOST;
800 #endif // __SYCL_DEVICE_ONLY__
801 }
802 
823 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
824  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
826  __ESIMD_DNS::lsc_data_order Transposed, int N>
827 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
828 __esimd_lsc_load_merge_stateless(
829  __ESIMD_DNS::simd_mask_storage_t<N> pred,
830  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
831  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> pass_thru = 0)
832  __ESIMD_INTRIN_END;
833 
851 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
852  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
853  __ESIMD_DNS::lsc_vector_size VS,
854  __ESIMD_DNS::lsc_data_order _Transposed, int N>
855 __ESIMD_INTRIN void __esimd_lsc_store_stateless(
856  __ESIMD_DNS::simd_mask_storage_t<N> pred,
857  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
858  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals)
859  __ESIMD_INTRIN_END;
860 
880 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
881  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
882  __ESIMD_DNS::lsc_vector_size VS,
883  __ESIMD_DNS::lsc_data_order _Transposed, int N,
884  typename SurfIndAliasTy>
885 __ESIMD_INTRIN void __esimd_lsc_store_bti(
886  __ESIMD_DNS::simd_mask_storage_t<N> pred,
887  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
888  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals,
889  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
890 
891 // \brief Raw sends.
892 //
893 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
894 //
895 // @param execSize the execution size, which must be a compile time constant.
896 //
897 // @param pred the predicate to specify enabled channels.
898 //
899 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
900 // constant.
901 //
902 // @param numSrc1 the number of GRFs for source-1, which must be a compile time
903 // constant.
904 //
905 // @param numDst the number of GRFs for destination, which must be a compile
906 // time constant.
907 //
908 // @param sfid the shared function ID, which must be a compile time constant.
909 //
910 // @param exDesc the extended message descriptor.
911 //
912 // @param msgDesc the message descriptor.
913 //
914 // @param msgSrc0 the first source operand of send message.
915 //
916 // @param msgSrc1 the second source operand of send message.
917 //
918 // @param msgDst the destination operand of send message.
919 //
920 // Returns a simd vector of type Ty1 and size N1.
921 //
922 template <typename Ty1, int N1, typename Ty2, int N2, typename Ty3, int N3,
923  int N = 16>
924 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1> __esimd_raw_sends2(
925  uint8_t modifier, uint8_t execSize,
926  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numSrc1,
927  uint8_t numDst, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
928  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
929  __ESIMD_DNS::vector_type_t<Ty3, N3> msgSrc1,
930  __ESIMD_DNS::vector_type_t<Ty1, N1> msgDst) __ESIMD_INTRIN_END;
931 
932 // \brief Raw send.
933 //
934 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
935 //
936 // @param execSize the execution size, which must be a compile time constant.
937 //
938 // @param pred the predicate to specify enabled channels.
939 //
940 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
941 // constant.
942 //
943 // @param numDst the number of GRFs for destination, which must be a compile
944 // time constant.
945 //
946 // @param sfid the shared function ID, which must be a compile time constant.
947 //
948 // @param exDesc the extended message descriptor.
949 //
950 // @param msgDesc the message descriptor.
951 //
952 // @param msgSrc0 the first source operand of send message.
953 //
954 // @param msgDst the destination operand of send message.
955 //
956 // Returns a simd vector of type Ty1 and size N1.
957 //
958 template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
959 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1> __esimd_raw_send2(
960  uint8_t modifier, uint8_t execSize,
961  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numDst,
962  uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
963  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
964  __ESIMD_DNS::vector_type_t<Ty1, N1> msgDst) __ESIMD_INTRIN_END;
965 
966 // \brief Raw sends.
967 //
968 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
969 //
970 // @param execSize the execution size, which must be a compile time constant.
971 //
972 // @param pred the predicate to specify enabled channels.
973 //
974 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
975 // constant.
976 //
977 // @param numSrc1 the number of GRFs for source-1, which must be a compile time
978 // constant.
979 //
980 // @param sfid the shared function ID, which must be a compile time constant.
981 //
982 // @param exDesc the extended message descriptor.
983 //
984 // @param msgDesc the message descriptor.
985 //
986 // @param msgSrc0 the first source operand of send message.
987 //
988 // @param msgSrc1 the second source operand of send message.
989 //
990 template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
991 __ESIMD_INTRIN void __esimd_raw_sends2_noresult(
992  uint8_t modifier, uint8_t execSize,
993  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numSrc1,
994  uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
995  __ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0,
996  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc1) __ESIMD_INTRIN_END;
997 
998 // \brief Raw send.
999 //
1000 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
1001 //
1002 // @param execSize the execution size, which must be a compile time constant.
1003 //
1004 // @param pred the predicate to specify enabled channels.
1005 //
1006 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
1007 // constant.
1008 //
1009 // @param sfid the shared function ID, which must be a compile time constant.
1010 //
1011 // @param exDesc the extended message descriptor.
1012 //
1013 // @param msgDesc the message descriptor.
1014 //
1015 // @param msgSrc0 the first source operand of send message.
1016 //
1017 template <typename Ty1, int N1, int N = 16>
1018 __ESIMD_INTRIN void __esimd_raw_send2_noresult(
1019  uint8_t modifier, uint8_t execSize,
1020  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t sfid,
1021  uint32_t exDesc, uint32_t msgDesc,
1022  __ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0) __ESIMD_INTRIN_END;
1023 
1054 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
1055  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_data_order Transposed,
1056  uint8_t NBlocks, int BlockWidth, int BlockHeight, bool Transformed,
1057  int N>
1058 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
1059 __esimd_lsc_load2d_stateless(__ESIMD_DNS::simd_mask_storage_t<N> Pred,
1060  uintptr_t Ptr, int SurfaceWidth, int SurfaceHeight,
1061  int SurfacePitch, int X, int Y) __ESIMD_INTRIN_END;
1062 
1087 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
1088  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_data_order Transposed,
1089  uint8_t NBlocks, int BlockWidth, int BlockHeight, bool Transformed,
1090  int N>
1091 __ESIMD_INTRIN void __esimd_lsc_prefetch2d_stateless(
1092  __ESIMD_DNS::simd_mask_storage_t<N> Pred, uintptr_t Ptr, int SurfaceWidth,
1093  int SurfaceHeight, int SurfacePitch, int X, int Y) __ESIMD_INTRIN_END;
1094 
1124 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
1125  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_data_order Transposed,
1126  uint8_t NBlocks, int BlockWidth, int BlockHeight, bool Transformed,
1127  int N>
1128 __ESIMD_INTRIN void __esimd_lsc_store2d_stateless(
1129  __ESIMD_DNS::simd_mask_storage_t<N> Pred, uintptr_t Ptr, int SurfaceWidth,
1130  int SurfaceHeight, int SurfacePitch, int X, int Y,
1131  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
1132 
The file contains implementations of accessor class.
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:122
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:64
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:145
atomic_op
Represents an atomic operation.
Definition: common.hpp:160
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:179
__ESIMD_API SZ src0
Definition: math.hpp:179
__ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc)
Get surface index corresponding to a SYCL accessor.
Definition: memory.hpp:53
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:407
cache_hint
L1, L2 or L3 cache hints.
constexpr if(sizeof(T)==8)
return(x >> one)+(y >> one)+((y &x) &one)
autodecltype(x) x
Definition: access.hpp:18