DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_intrin.hpp
Go to the documentation of this file.
1 //==------------ memory_intrin.hpp - DPC++ Explicit SIMD API ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Declares Explicit SIMD intrinsics used to implement working with
9 // the SIMD classes objects.
10 //===----------------------------------------------------------------------===//
11 
13 
14 #pragma once
15 
16 #include <sycl/accessor.hpp>
20 #include <sycl/types.hpp>
21 
22 #include <cstdint>
23 
24 namespace sycl {
25 inline namespace _V1 {
26 
27 namespace ext::intel::esimd {
28 template <typename AccessorTy>
29 __ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc);
30 } // namespace ext::intel::esimd
31 
32 namespace ext::intel::esimd::detail {
33 
34 // Provides access to sycl accessor class' private members.
35 class AccessorPrivateProxy {
36 public:
37  template <typename AccessorTy>
38  static auto getQualifiedPtrOrImageObj(const AccessorTy &Acc) {
39 #ifdef __SYCL_DEVICE_ONLY__
40  if constexpr (sycl::detail::acc_properties::is_image_accessor_v<AccessorTy>)
41  return Acc.getNativeImageObj();
42  else
43  return Acc.getQualifiedPtr();
44 #else // __SYCL_DEVICE_ONLY__
45  return Acc;
46 #endif // __SYCL_DEVICE_ONLY__
47  }
48 
49 #ifndef __SYCL_DEVICE_ONLY__
50  static void *getPtr(const sycl::detail::AccessorBaseHost &Acc) {
51  return Acc.getPtr();
52  }
53 #endif // __SYCL_DEVICE_ONLY__
54 };
55 
56 template <int ElemsPerAddr,
57  typename = std::enable_if_t<(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
58  ElemsPerAddr == 4)>>
59 constexpr unsigned int ElemsPerAddrEncoding() {
60  // encoding requires log2 of ElemsPerAddr
61  if constexpr (ElemsPerAddr == 1)
62  return 0;
63  else if constexpr (ElemsPerAddr == 2)
64  return 1;
65  else if constexpr (ElemsPerAddr == 4)
66  return 2;
67 
68  // other cases not needed since std::enable_if disallows other values
69 }
70 
71 constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
72  // encoding requires 2^ElemsPerAddrEncoded
73  return (1 << ElemsPerAddrEncoded);
74 }
75 
76 } // namespace ext::intel::esimd::detail
77 } // namespace _V1
78 } // namespace sycl
79 
80 // flat_read does flat-address gather
81 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
82 __ESIMD_INTRIN
83  __ESIMD_DNS::vector_type_t<Ty,
84  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
85  __esimd_svm_gather(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
86  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
87  __ESIMD_INTRIN_END;
88 
89 // flat_write does flat-address scatter
90 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
91 __ESIMD_INTRIN void __esimd_svm_scatter(
92  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
93  __ESIMD_DNS::vector_type_t<Ty,
94  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
95  vals,
96  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1) __ESIMD_INTRIN_END;
97 
98 // Reads a block of data from given surface at given offset.
99 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
100 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
101 __esimd_oword_ld_unaligned(SurfIndAliasTy surf_ind,
102  uint32_t offset) __ESIMD_INTRIN_END;
103 
104 // Writes given block of data to a surface with given index at given offset.
105 template <typename Ty, int N, typename SurfIndAliasTy>
106 __ESIMD_INTRIN void
107 __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t owords_offset,
108  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
109 
110 // Read a block of data from the given address.
111 template <typename Ty, int N, size_t Align>
112 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_block_ld(
113  const __ESIMD_DNS::vector_type_t<Ty, N> *addr) __ESIMD_INTRIN_END;
114 
115 // flat_block_write writes a block of data using one flat address
116 template <typename Ty, int N, size_t Align>
117 __ESIMD_INTRIN void
118 __esimd_slm_block_st(uint32_t offset,
119  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
120 
138 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
139  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
140  __ESIMD_DNS::lsc_vector_size VS,
141  __ESIMD_DNS::lsc_data_order _Transposed, int N>
142 __ESIMD_INTRIN void __esimd_lsc_store_slm(
143  __ESIMD_DNS::simd_mask_storage_t<N> pred,
144  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
145  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals)
146  __ESIMD_INTRIN_END;
147 
148 // Read a block of data from SLM at the given offset.
149 template <typename Ty, int N, size_t Align>
150 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
151 __esimd_slm_block_ld(uint32_t offset) __ESIMD_INTRIN_END;
152 
153 // flat_block_write writes a block of data using one flat address
154 template <typename Ty, int N, size_t Align>
155 __ESIMD_INTRIN void
156 __esimd_svm_block_st(__ESIMD_DNS::vector_type_t<Ty, N> *addr,
157  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
158 
179 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
180  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
181  __ESIMD_DNS::lsc_vector_size VS,
182  __ESIMD_DNS::lsc_data_order _Transposed, int N>
183 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
184 __esimd_lsc_load_merge_slm(
185  __ESIMD_DNS::simd_mask_storage_t<N> pred,
186  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
187  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> pass_thru)
188  __ESIMD_INTRIN_END;
189 
193 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
194  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
195  __ESIMD_DNS::lsc_vector_size VS,
196  __ESIMD_DNS::lsc_data_order _Transposed, int N>
197 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
198 __esimd_lsc_load_slm(__ESIMD_DNS::simd_mask_storage_t<N> pred,
199  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
200  __ESIMD_INTRIN_END;
201 
202 // Gather data from the given global or private addresses.
203 template <typename T, int N, size_t Align>
204 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_gather_ld(
205  __ESIMD_DNS::vector_type_t<uint64_t, N> vptr,
206  __ESIMD_DNS::simd_mask_storage_t<N> pred,
207  __ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;
208 
209 // Gather data from the given SLM addresses.
210 template <typename T, int N, size_t Align>
211 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_slm_gather_ld(
212  __ESIMD_DNS::vector_type_t<uint32_t, N> vptr,
213  __ESIMD_DNS::simd_mask_storage_t<N> pred,
214  __ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;
215 
216 // Scatter data to given global or private addresses.
217 template <typename T, int N, size_t Align>
218 __ESIMD_INTRIN void
219 __esimd_scatter_st(__ESIMD_DNS::vector_type_t<T, N> vals,
220  __ESIMD_DNS::vector_type_t<uint64_t, N> vptr,
221  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
222 
223 // Scatter data to given SLM addresses.
224 template <typename T, int N, size_t Align>
225 __ESIMD_INTRIN void __esimd_slm_scatter_st(
226  __ESIMD_DNS::vector_type_t<T, N> vals,
227  __ESIMD_DNS::vector_type_t<uint32_t, N> vptr,
228  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
229 
254 template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
255  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
256  __ESIMD_DNS::lsc_vector_size VS,
257  __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
258 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
259 __esimd_lsc_load_merge_bti(
260  __ESIMD_DNS::simd_mask_storage_t<N> pred,
261  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets, SurfIndAliasT surf_ind,
262  __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()> PassThru)
263  __ESIMD_INTRIN_END;
264 
268 template <typename T, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
269  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
270  __ESIMD_DNS::lsc_vector_size VS,
271  __ESIMD_DNS::lsc_data_order Transposed, int N, typename SurfIndAliasT>
272 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N * __ESIMD_DNS::to_int<VS>()>
273 __esimd_lsc_load_bti(__ESIMD_DNS::simd_mask_storage_t<N> pred,
274  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
275  SurfIndAliasT surf_ind) __ESIMD_INTRIN_END;
276 
277 // flat_read4 does flat-address gather4
278 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
279 __ESIMD_DNS::vector_type_t<Ty,
280  N * get_num_channels_enabled(Mask)> __ESIMD_INTRIN
281 __esimd_svm_gather4_scaled(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
282  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
283  __ESIMD_INTRIN_END;
284 
285 // flat_write does flat-address scatter
286 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
287 __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
288  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
289  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals,
290  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1) __ESIMD_INTRIN_END;
291 
292 // Low-level surface-based scatter. Writes elements of a \ref simd object into a
293 // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is
294 // always represented as a 4-byte value within the input simd object,
295 // unused (not written) upper bytes are ignored.
296 // Template (compile-time constant) parameters:
297 // @tparam Ty - element type; can only be a 4-byte integer or \c float,
298 // @tparam N - the number of elements to write
299 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
300 // accessor used to denote the surface
301 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
302 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
303 // @tparam Scale - offset scale; only 0 is supported for now
304 //
305 // Formal parameters:
306 // @param pred - per-element predicates; elements with zero corresponding
307 // predicates are not written
308 // @param surf_ind - the surface index, taken from the SYCL memory object
309 // @param global_offset - offset added to each individual element's offset to
310 // compute actual memory access offset for that element
311 // @param elem_offsets - per-element offsets
312 // @param vals - values to write
313 //
314 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
315  int16_t Scale = 0>
316 __ESIMD_INTRIN void __esimd_scatter_scaled(
317  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
318  uint32_t global_offset,
319  __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets,
320  __ESIMD_DNS::vector_type_t<Ty, N> vals) __ESIMD_INTRIN_END;
321 
322 // flat_atomic: flat-address atomic
323 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
324 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic0(
325  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
326  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
327 
328 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
329 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic1(
330  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
331  __ESIMD_DNS::vector_type_t<Ty, N> src0,
332  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
333 
334 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
335 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_svm_atomic2(
336  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
337  __ESIMD_DNS::vector_type_t<Ty, N> src0,
338  __ESIMD_DNS::vector_type_t<Ty, N> src1,
339  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
340 
356 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
357  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
358  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
359  __ESIMD_DNS::lsc_data_order Transposed, int N>
360 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
361 __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
362  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs)
363  __ESIMD_INTRIN_END;
364 
378 
382 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
383  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
384  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
385  __ESIMD_DNS::lsc_data_order Transposed, int N>
386 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
387 __esimd_lsc_xatomic_stateless_1(
388  __ESIMD_DNS::simd_mask_storage_t<N> pred,
389  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
390  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0)
391  __ESIMD_INTRIN_END;
392 
410 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
411  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
412  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
413  __ESIMD_DNS::lsc_data_order Transposed, int N>
414 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
415 __esimd_lsc_xatomic_stateless_2(
416  __ESIMD_DNS::simd_mask_storage_t<N> Pred,
417  __ESIMD_DNS::vector_type_t<uintptr_t, N> Addrs,
418  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
419  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1)
420  __ESIMD_INTRIN_END;
421 
439 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
440  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
441  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
442  __ESIMD_DNS::lsc_data_order Transposed, int N,
443  typename SurfIndAliasTy>
444 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
445 __esimd_lsc_xatomic_bti_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
446  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
447  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
448 
467 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
468  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
469  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
470  __ESIMD_DNS::lsc_data_order _Transposed, int N,
471  typename SurfIndAliasTy>
472 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
473 __esimd_lsc_xatomic_bti_1(
474  __ESIMD_DNS::simd_mask_storage_t<N> pred,
475  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
476  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
477  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
478 
498 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
499  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
500  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
501  __ESIMD_DNS::lsc_data_order Transposed, int N,
502  typename SurfIndAliasTy>
503 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
504 __esimd_lsc_xatomic_bti_2(
505  __ESIMD_DNS::simd_mask_storage_t<N> pred,
506  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
507  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
508  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1,
509  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
510 
526 template <typename Ty, int InternalOpOp, __ESIMD_NS::cache_hint L1H,
527  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
528  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
529  __ESIMD_DNS::lsc_data_order Transposed, int N>
530 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
531 __esimd_lsc_xatomic_slm_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
532  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
533  __ESIMD_INTRIN_END;
534 
551 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
552  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
553  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
554  __ESIMD_DNS::lsc_data_order Transposed, int N>
555 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
556 __esimd_lsc_xatomic_slm_1(
557  __ESIMD_DNS::simd_mask_storage_t<N> pred,
558  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
559  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0)
560  __ESIMD_INTRIN_END;
561 
579 template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
580  __ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
581  __ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
582  __ESIMD_DNS::lsc_data_order Transposed, int N>
583 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
584 __esimd_lsc_xatomic_slm_2(
585  __ESIMD_DNS::simd_mask_storage_t<N> pred,
586  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
587  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
588  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1)
589  __ESIMD_INTRIN_END;
590 
591 __ESIMD_INTRIN void __esimd_slm_init(uint32_t size) __ESIMD_INTRIN_END;
592 
593 // esimd_barrier, generic group barrier
594 __ESIMD_INTRIN void __esimd_barrier() __ESIMD_INTRIN_END;
595 
596 // slm_fence sets the SLM read/write order
597 __ESIMD_INTRIN void __esimd_fence(uint8_t cntl) __ESIMD_INTRIN_END;
598 
607 template <uint8_t Kind, uint8_t FenceOp, uint8_t Scope, int N>
608 __ESIMD_INTRIN void
609 __esimd_lsc_fence(__ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
610 
611 // Predicated (masked) scaled gather from a surface.
612 //
613 // Template (compile-time constant) parameters:
614 // @tparam Ty - element type
615 // @tparam N - the number of elements to read
616 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
617 // accessor used to denote the surface
618 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
619 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
620 // @tparam Scale - offset scale; only 0 is supported for now
621 //
622 // Formal parameters:
623 // @param surf_ind - the surface index, taken from the SYCL memory object
624 // @param global_offset - offset added to each individual element's offset to
625 // compute actual memory access offset for that element
626 // @param offsets - per-element offsets
627 // @param pred - per-element predicates; elements with zero corresponding
628 // predicates are not written
629 // @return - elements read ("gathered") from memory
630 
631 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
632  int16_t Scale = 0>
633 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_gather_masked_scaled2(
634  SurfIndAliasTy surf_ind, uint32_t global_offset,
635  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
636  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
637 
638 // Reads a block of data from given surface at given `offset` counted
639 // in 16-byte chunks.
640 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
641 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
642 __esimd_oword_ld(SurfIndAliasTy surf_ind,
643  uint32_t owords_offset) __ESIMD_INTRIN_END;
644 
645 // gather4 scaled masked from a surface/SLM
646 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask,
647  typename SurfIndAliasTy, int16_t Scale = 0>
648 __ESIMD_INTRIN
649  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
650  __esimd_gather4_masked_scaled2(
651  SurfIndAliasTy surf_ind, int global_offset,
652  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
653  __ESIMD_DNS::simd_mask_storage_t<N> pred) __ESIMD_INTRIN_END;
654 
655 // scatter4 scaled to a surface/SLM
656 template <typename Ty, int N, typename SurfIndAliasTy,
657  __ESIMD_NS::rgba_channel_mask Mask, int16_t Scale = 0>
658 __ESIMD_INTRIN void __esimd_scatter4_scaled(
659  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
660  int global_offset, __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
661  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals)
662  __ESIMD_INTRIN_END;
663 
664 // Surface-based atomic operations
665 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
666 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic0(
667  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
668  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs) __ESIMD_INTRIN_END;
669 
670 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
671 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic1(
672  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
673  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
674  __ESIMD_DNS::vector_type_t<Ty, N> src0) __ESIMD_INTRIN_END;
675 
676 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
677 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N> __esimd_dword_atomic2(
678  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
679  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
680  __ESIMD_DNS::vector_type_t<Ty, N> src0,
681  __ESIMD_DNS::vector_type_t<Ty, N> src1) __ESIMD_INTRIN_END;
682 
683 // Media block load.
684 //
685 // @tparam Ty the element data type.
686 // @tparam M the hight of the 2D block.
687 // @tparam N the width of the 2D block.
688 // @tparam Modifier top/bottom field surface access control.
689 // @tparam TACC type of the surface handle.
690 // @tparam Plane planar surface index.
691 // @tparam BlockWidth the width of the return block.
692 // @param handle the surface handle.
693 // @param x X-coordinate of the left upper rectangle corner in BYTES.
694 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
695 //
696 // @return the linearized 2D block data read from surface.
697 //
698 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
699  int BlockWidth>
700 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, M * N>
701 __esimd_media_ld(TACC handle, unsigned x, unsigned y) __ESIMD_INTRIN_END;
702 
703 // Media block store
704 //
705 // @tparam Ty the element data type.
706 // @tparam M the hight of the 2D block.
707 // @tparam N the width of the 2D block.
708 // @tparam Modifier top/bottom field surface access control.
709 // @tparam TACC type of the surface handle.
710 // @tparam Plane planar surface index.
711 // @tparam BlockWidth the width of the return block.
712 // @param handle the surface handle.
713 // @param x X-coordinate of the left upper rectangle corner in BYTES.
714 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
715 // @param vals the linearized 2D block data to be written to surface.
716 //
717 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
718  int BlockWidth>
719 __ESIMD_INTRIN void
720 __esimd_media_st(TACC handle, unsigned x, unsigned y,
721  __ESIMD_DNS::vector_type_t<Ty, M * N> vals) __ESIMD_INTRIN_END;
722 
723 // \brief Converts given value to a surface index.
724 // The input must always be a result of
725 // detail::AccessorPrivateProxy::getQualifiedPtrOrImageObj(acc)
726 // where acc is a buffer or image accessor. If the result is, say, 'obj', then
727 // 'obj' is really a value of the surface index kept in a differently typed
728 // accessor field. Front-end compilation time type of 'obj' is either
729 // ConcreteASPtrType (detail::DecoratedType<DataT, AS>::type *), for a buffer
730 // or
731 // image{1,2,3}d_t OpenCL type for an image
732 // But when doing code generation, FE replaces e.g. '__read_only image2d_t' FE
733 // type with '%opencl.image2d_ro_t addrspace(1) *' LLVM type or a Target
734 // Extension Type if using opaque pointers. These types can neither be
735 // reinterpret_cast'ed from pointer to intptr_t (because they are not a pointer
736 // at FE translation time), nor can they be bit_cast'ed to intptr_t (because
737 // they are not trivially copyable). This function takes advantage of the fact
738 // that in SPIR-V 'obj' is always a pointer, where we can do ptr to uint32_t
739 // conversion. This function can be called only from the device code, as
740 // accessor => memory handle translation for host is different.
741 // @param acc the SYCL accessor.
742 // Returns the binding table index value.
743 template <typename MemObjTy>
744 ESIMD_INLINE __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj) {
745 #ifdef __SYCL_DEVICE_ONLY__
746  return __spirv_ConvertPtrToU<MemObjTy, uint32_t>(obj);
747 #else // __SYCL_DEVICE_ONLY__
748  __ESIMD_UNSUPPORTED_ON_HOST;
749 #endif // __SYCL_DEVICE_ONLY__
750 }
751 
772 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
773  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
775  __ESIMD_DNS::lsc_data_order Transposed, int N>
776 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
777 __esimd_lsc_load_merge_stateless(
778  __ESIMD_DNS::simd_mask_storage_t<N> pred,
779  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
780  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> pass_thru = 0)
781  __ESIMD_INTRIN_END;
782 
801 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
802  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
803  __ESIMD_DNS::lsc_vector_size VS,
804  __ESIMD_DNS::lsc_data_order Transposed, int N>
805 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
806 __esimd_lsc_load_stateless(__ESIMD_DNS::simd_mask_storage_t<N> pred,
807  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs)
808  __ESIMD_INTRIN_END;
809 
827 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
828  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
829  __ESIMD_DNS::lsc_vector_size VS,
830  __ESIMD_DNS::lsc_data_order _Transposed, int N>
831 __ESIMD_INTRIN void __esimd_lsc_store_stateless(
832  __ESIMD_DNS::simd_mask_storage_t<N> pred,
833  __ESIMD_DNS::vector_type_t<uintptr_t, N> addrs,
834  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals)
835  __ESIMD_INTRIN_END;
836 
856 template <typename Ty, __ESIMD_NS::cache_hint L1H, __ESIMD_NS::cache_hint L2H,
857  uint16_t AddressScale, int ImmOffset, __ESIMD_DNS::lsc_data_size DS,
858  __ESIMD_DNS::lsc_vector_size VS,
859  __ESIMD_DNS::lsc_data_order _Transposed, int N,
860  typename SurfIndAliasTy>
861 __ESIMD_INTRIN void __esimd_lsc_store_bti(
862  __ESIMD_DNS::simd_mask_storage_t<N> pred,
863  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
864  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> vals,
865  SurfIndAliasTy surf_ind) __ESIMD_INTRIN_END;
866 
867 // \brief Raw sends.
868 //
869 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
870 //
871 // @param execSize the execution size, which must be a compile time constant.
872 //
873 // @param pred the predicate to specify enabled channels.
874 //
875 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
876 // constant.
877 //
878 // @param numSrc1 the number of GRFs for source-1, which must be a compile time
879 // constant.
880 //
881 // @param numDst the number of GRFs for destination, which must be a compile
882 // time constant.
883 //
884 // @param sfid the shared function ID, which must be a compile time constant.
885 //
886 // @param exDesc the extended message descriptor.
887 //
888 // @param msgDesc the message descriptor.
889 //
890 // @param msgSrc0 the first source operand of send message.
891 //
892 // @param msgSrc1 the second source operand of send message.
893 //
894 // @param msgDst the destination operand of send message.
895 //
896 // Returns a simd vector of type Ty1 and size N1.
897 //
898 template <typename Ty1, int N1, typename Ty2, int N2, typename Ty3, int N3,
899  int N = 16>
900 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1> __esimd_raw_sends2(
901  uint8_t modifier, uint8_t execSize,
902  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numSrc1,
903  uint8_t numDst, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
904  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
905  __ESIMD_DNS::vector_type_t<Ty3, N3> msgSrc1,
906  __ESIMD_DNS::vector_type_t<Ty1, N1> msgDst) __ESIMD_INTRIN_END;
907 
908 // \brief Raw send.
909 //
910 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
911 //
912 // @param execSize the execution size, which must be a compile time constant.
913 //
914 // @param pred the predicate to specify enabled channels.
915 //
916 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
917 // constant.
918 //
919 // @param numDst the number of GRFs for destination, which must be a compile
920 // time constant.
921 //
922 // @param sfid the shared function ID, which must be a compile time constant.
923 //
924 // @param exDesc the extended message descriptor.
925 //
926 // @param msgDesc the message descriptor.
927 //
928 // @param msgSrc0 the first source operand of send message.
929 //
930 // @param msgDst the destination operand of send message.
931 //
932 // Returns a simd vector of type Ty1 and size N1.
933 //
934 template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
935 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty1, N1> __esimd_raw_send2(
936  uint8_t modifier, uint8_t execSize,
937  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numDst,
938  uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
939  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc0,
940  __ESIMD_DNS::vector_type_t<Ty1, N1> msgDst) __ESIMD_INTRIN_END;
941 
942 // \brief Raw sends.
943 //
944 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
945 //
946 // @param execSize the execution size, which must be a compile time constant.
947 //
948 // @param pred the predicate to specify enabled channels.
949 //
950 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
951 // constant.
952 //
953 // @param numSrc1 the number of GRFs for source-1, which must be a compile time
954 // constant.
955 //
956 // @param sfid the shared function ID, which must be a compile time constant.
957 //
958 // @param exDesc the extended message descriptor.
959 //
960 // @param msgDesc the message descriptor.
961 //
962 // @param msgSrc0 the first source operand of send message.
963 //
964 // @param msgSrc1 the second source operand of send message.
965 //
966 template <typename Ty1, int N1, typename Ty2, int N2, int N = 16>
967 __ESIMD_INTRIN void __esimd_raw_sends2_noresult(
968  uint8_t modifier, uint8_t execSize,
969  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t numSrc1,
970  uint8_t sfid, uint32_t exDesc, uint32_t msgDesc,
971  __ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0,
972  __ESIMD_DNS::vector_type_t<Ty2, N2> msgSrc1) __ESIMD_INTRIN_END;
973 
974 // \brief Raw send.
975 //
976 // @param modifier the send message flags (Bit-0: isSendc, Bit-1: isEOT).
977 //
978 // @param execSize the execution size, which must be a compile time constant.
979 //
980 // @param pred the predicate to specify enabled channels.
981 //
982 // @param numSrc0 the number of GRFs for source-0, which must be a compile time
983 // constant.
984 //
985 // @param sfid the shared function ID, which must be a compile time constant.
986 //
987 // @param exDesc the extended message descriptor.
988 //
989 // @param msgDesc the message descriptor.
990 //
991 // @param msgSrc0 the first source operand of send message.
992 //
993 template <typename Ty1, int N1, int N = 16>
994 __ESIMD_INTRIN void __esimd_raw_send2_noresult(
995  uint8_t modifier, uint8_t execSize,
996  __ESIMD_DNS::simd_mask_storage_t<N> pred, uint8_t numSrc0, uint8_t sfid,
997  uint32_t exDesc, uint32_t msgDesc,
998  __ESIMD_DNS::vector_type_t<Ty1, N1> msgSrc0) __ESIMD_INTRIN_END;
999 
The file contains implementations of accessor class.
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:121
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:63
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:144
atomic_op
Represents an atomic operation.
Definition: common.hpp:159
__ESIMD_API SZ simd< T, SZ > src1
Definition: math.hpp:179
__ESIMD_API SZ src0
Definition: math.hpp:179
__ESIMD_API SurfaceIndex get_surface_index(AccessorTy acc)
Get surface index corresponding to a SYCL accessor.
Definition: memory.hpp:53
lsc_data_size
Data size or format to read or store.
Definition: common.hpp:449
cache_hint
L1, L2 or L3 cache hints.
Definition: common.hpp:348
constexpr if(sizeof(T)==8)
return(x >> one)+(y >> one)+((y &x) &one)
Definition: access.hpp:18