DPC++ Runtime
Runtime libraries for oneAPI DPC++
memory_intrin.hpp
Go to the documentation of this file.
1 //==------------ memory_intrin.hpp - DPC++ Explicit SIMD API ---------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // Declares Explicit SIMD intrinsics used to implement working with
9 // the SIMD classes objects.
10 //===----------------------------------------------------------------------===//
11 
13 
14 #pragma once
15 
17 #include <CL/sycl/types.hpp>
21 
22 #include <cstdint>
23 
24 #ifndef __SYCL_DEVICE_ONLY__
25 // ESIMD_CPU Emulation support using esimd_cpu plugin
26 
28 #include <CL/sycl/detail/pi.hpp>
31 
32 // Channel Mask Array for scaled-gather/scatter
33 const std::array<__ESIMD_NS::rgba_channel, 4> ChannelMaskArray{
34  __ESIMD_NS::rgba_channel::R, __ESIMD_NS::rgba_channel::G,
35  __ESIMD_NS::rgba_channel::B, __ESIMD_NS::rgba_channel::A};
36 
37 #endif // ifndef __SYCL_DEVICE_ONLY__
38 
40 namespace __ESIMD_DNS {
41 
42 // Provides access to sycl accessor class' private members.
43 class AccessorPrivateProxy {
44 public:
45  template <typename AccessorTy>
46  static auto getNativeImageObj(const AccessorTy &Acc) {
47 #ifdef __SYCL_DEVICE_ONLY__
48  return Acc.getNativeImageObj();
49 #else // __SYCL_DEVICE_ONLY__
50  return Acc;
51 #endif // __SYCL_DEVICE_ONLY__
52  }
53 #ifndef __SYCL_DEVICE_ONLY__
54  static void *getPtr(const sycl::detail::AccessorBaseHost &Acc) {
55  return Acc.getPtr();
56  }
57 #endif // __SYCL_DEVICE_ONLY__
58 };
59 
60 template <int ElemsPerAddr,
61  typename = std::enable_if_t<(ElemsPerAddr == 1 || ElemsPerAddr == 2 ||
62  ElemsPerAddr == 4)>>
63 constexpr unsigned int ElemsPerAddrEncoding() {
64  // encoding requires log2 of ElemsPerAddr
65  if constexpr (ElemsPerAddr == 1)
66  return 0;
67  else if constexpr (ElemsPerAddr == 2)
68  return 1;
69  else if constexpr (ElemsPerAddr == 4)
70  return 2;
71 
72  // other cases not needed since enable_if disallows other values
73 }
74 
75 constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) {
76  // encoding requires 2^ElemsPerAddrEncoded
77  return (1 << ElemsPerAddrEncoded);
78 }
79 
80 } // namespace __ESIMD_DNS
81 } // __SYCL_INLINE_NAMESPACE(cl)
82 
83 // flat_read does flat-address gather
84 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
85 __ESIMD_INTRIN
86  __ESIMD_DNS::vector_type_t<Ty,
87  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
88  __esimd_svm_gather(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
89  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
90 #ifdef __SYCL_DEVICE_ONLY__
91  ;
92 #else
93 {
94  auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
95  __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
96  V = 0;
97  auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
98  if (sizeof(Ty) == 2)
99  ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
100 
101  for (int I = 0; I < N; I++) {
102  if (pred[I]) {
103  Ty *Addr = reinterpret_cast<Ty *>(addrs[I]);
104  if (sizeof(Ty) <= 2) {
105  for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
106  V[I * NumBlkDecoded + J] = *(Addr + J);
107  } else {
108  for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
109  V[J * N + I] = *(Addr + J);
110  }
111  }
112  }
113  return V;
114 }
115 #endif // __SYCL_DEVICE_ONLY__
116 
117 // flat_write does flat-address scatter
118 template <typename Ty, int N, int NumBlk = 0, int ElemsPerAddr = 0>
119 __ESIMD_INTRIN void __esimd_svm_scatter(
120  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
121  __ESIMD_DNS::vector_type_t<Ty,
122  N * __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk)>
123  vals,
124  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
125 #ifdef __SYCL_DEVICE_ONLY__
126  ;
127 #else
128 {
129  auto NumBlkDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(NumBlk);
130  auto ElemsPerAddrDecoded = __ESIMD_DNS::ElemsPerAddrDecoding(ElemsPerAddr);
131  if (sizeof(Ty) == 2)
132  ElemsPerAddrDecoded = ElemsPerAddrDecoded / 2;
133 
134  for (int I = 0; I < N; I++) {
135  if (pred[I]) {
136  Ty *Addr = reinterpret_cast<Ty *>(addrs[I]);
137  if (sizeof(Ty) <= 2) {
138  for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
139  *(Addr + J) = vals[I * NumBlkDecoded + J];
140  } else {
141  for (int J = 0; J < NumBlkDecoded && J < ElemsPerAddrDecoded; J++)
142  *(Addr + J) = vals[J * N + I];
143  }
144  }
145  }
146 }
147 #endif // __SYCL_DEVICE_ONLY__
148 
149 // flat_block_read reads a block of data from one flat address
150 template <typename Ty, int N>
151 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
152 __esimd_svm_block_ld_unaligned(uint64_t addr)
153 #ifdef __SYCL_DEVICE_ONLY__
154  ;
155 #else
156 {
157  __ESIMD_DNS::vector_type_t<Ty, N> V;
158 
159  for (int I = 0; I < N; I++) {
160  Ty *Addr = reinterpret_cast<Ty *>(addr + I * sizeof(Ty));
161  V[I] = *Addr;
162  }
163  return V;
164 }
165 #endif // __SYCL_DEVICE_ONLY__
166 
167 // Read a block of data from the given address. Address must be 16-byte aligned.
168 template <typename Ty, int N>
169 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
170 __esimd_svm_block_ld(uint64_t addr)
171 #ifdef __SYCL_DEVICE_ONLY__
172  ;
173 #else
174 {
175  __ESIMD_DNS::vector_type_t<Ty, N> V;
176 
177  for (int I = 0; I < N; I++) {
178  Ty *Addr = reinterpret_cast<Ty *>(addr + I * sizeof(Ty));
179  V[I] = *Addr;
180  }
181  return V;
182 }
183 #endif // __SYCL_DEVICE_ONLY__
184 
185 // flat_block_write writes a block of data using one flat address
186 template <typename Ty, int N>
187 __ESIMD_INTRIN void __esimd_svm_block_st(uint64_t addr,
188  __ESIMD_DNS::vector_type_t<Ty, N> vals)
189 #ifdef __SYCL_DEVICE_ONLY__
190  ;
191 #else
192 {
193  for (int I = 0; I < N; I++) {
194  Ty *Addr = reinterpret_cast<Ty *>(addr + I * sizeof(Ty));
195  *Addr = vals[I];
196  }
197 }
198 #endif // __SYCL_DEVICE_ONLY__
199 
200 // Reads a block of data from given surface at given offset.
201 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
202 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
203 __esimd_oword_ld_unaligned(SurfIndAliasTy surf_ind, uint32_t offset)
204 #ifdef __SYCL_DEVICE_ONLY__
205  ;
206 #else
207 {
208  __ESIMD_DNS::vector_type_t<Ty, N> retv;
209  sycl::detail::ESIMDDeviceInterface *I =
211 
212  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
213  // O-word/Block load for Shared Local Memory
214  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
215  char *SlmBase = I->__cm_emu_get_slm_ptr();
216  for (int i = 0; i < N; ++i) {
217  Ty *SlmAddr = reinterpret_cast<Ty *>(offset + SlmBase);
218  retv[i] = *SlmAddr;
219  offset += sizeof(Ty);
220  }
221  } else {
222  // O-word/Block load for regular surface indexed by surf_ind
223  char *readBase;
224  uint32_t width;
225  std::mutex *mutexLock;
226 
227  I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
228 
229  std::lock_guard<std::mutex> lock(*mutexLock);
230 
231  for (int idx = 0; idx < N; idx++) {
232  if (offset >= width) {
233  retv[idx] = 0;
234  } else {
235  retv[idx] = *((Ty *)(readBase + offset));
236  }
237  offset += (uint32_t)sizeof(Ty);
238  }
239  }
240  return retv;
241 }
242 #endif // __SYCL_DEVICE_ONLY__
243 
244 // Writes given block of data to a surface with given index at given offset.
245 template <typename Ty, int N, typename SurfIndAliasTy>
246 __ESIMD_INTRIN void __esimd_oword_st(SurfIndAliasTy surf_ind, uint32_t offset,
247  __ESIMD_DNS::vector_type_t<Ty, N> vals)
248 #ifdef __SYCL_DEVICE_ONLY__
249  ;
250 #else
251 {
252  offset <<= 4;
253 
254  sycl::detail::ESIMDDeviceInterface *I =
256  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
257  // O-word/Block store for Shared Local Memory
258  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
259  char *SlmBase = I->__cm_emu_get_slm_ptr();
260  for (int i = 0; i < N; ++i) {
261  Ty *SlmAddr = reinterpret_cast<Ty *>(offset + SlmBase);
262  *SlmAddr = vals[i];
263  offset += sizeof(Ty);
264  }
265  } else {
266  // O-word/Block store for regular surface indexed by surf_ind
267  char *writeBase;
268  uint32_t width;
269  std::mutex *mutexLock;
270 
271  I->sycl_get_cm_buffer_params_ptr(surf_ind, &writeBase, &width, &mutexLock);
272 
273  std::lock_guard<std::mutex> lock(*mutexLock);
274 
275  for (int idx = 0; idx < N; idx++) {
276  if (offset < width) {
277  *((Ty *)(writeBase + offset)) = vals[idx];
278  } else {
279  break;
280  }
281  offset += (uint32_t)sizeof(Ty);
282  }
283 
284  // TODO : Optimize
285  I->cm_fence_ptr();
286  }
287 }
288 #endif // __SYCL_DEVICE_ONLY__
289 
290 // flat_read4 does flat-address gather4
291 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
292 __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
293  __ESIMD_INTRIN
294  __esimd_svm_gather4_scaled(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
295  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
296 #ifdef __SYCL_DEVICE_ONLY__
297  ;
298 #else
299 {
300  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> V = 0;
301  unsigned int Next = 0;
302  uint64_t Offset = 0;
303 
304  for (const auto &channel : ChannelMaskArray) {
305  if (__ESIMD_NS::is_channel_enabled(Mask, channel)) {
306  for (int I = 0; I < N; I++, Next++) {
307  if (pred[I]) {
308  Ty *Addr = reinterpret_cast<Ty *>(addrs[I] + Offset);
309  V[Next] = *Addr;
310  }
311  }
312  }
313  Offset += (uint64_t)sizeof(Ty);
314  }
315 
316  return V;
317 }
318 #endif // __SYCL_DEVICE_ONLY__
319 
320 // flat_write does flat-address scatter
321 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask>
322 __ESIMD_INTRIN void __esimd_svm_scatter4_scaled(
323  __ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
324  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals,
325  __ESIMD_DNS::simd_mask_storage_t<N> pred = 1)
326 #ifdef __SYCL_DEVICE_ONLY__
327  ;
328 #else
329 {
330  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> V;
331  unsigned int Next = 0;
332  uint64_t Offset = 0;
333 
334  for (const auto &channel : ChannelMaskArray) {
335  if (__ESIMD_NS::is_channel_enabled(Mask, channel)) {
336  for (int I = 0; I < N; I++, Next++) {
337  if (pred[I]) {
338  Ty *Addr = reinterpret_cast<Ty *>(addrs[I] + Offset);
339  *Addr = vals[Next];
340  }
341  }
342  }
343  Offset += (uint64_t)sizeof(Ty);
344  }
345 }
346 #endif // __SYCL_DEVICE_ONLY__
347 
348 // Low-level surface-based gather. Collects elements located at given offsets in
349 // a surface and returns them as a single \ref simd object. Element can be
350 // 1, 2 or 4-byte value, but is always returned as a 4-byte value within the
351 // resulting simd object, with upper 2 or 3 bytes undefined.
352 // Template (compile-time constant) parameters:
353 // @tparam Ty - element type; can only be a 4-byte integer or \c float,
354 // @tparam N - the number of elements
355 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
356 // accessor used to denote the surface
357 // @tparam TySizeLog2 - Log2 of the number of bytes read per element:
358 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
359 // @tparam Scale - offset scaling factor; must be zero currently
360 // @tparam L1H - L1 cache hint
361 // @tparam L3H - L3 cache hint
362 //
363 // Formal parameters:
364 // @param surf_ind - the surface index, taken from the SYCL memory object
365 // @param global_offset - offset added to each individual element's offset to
366 // compute actual memory access offset for that element
367 // @param elem_offsets - per-element offsets
368 //
369 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
370  int16_t Scale = 0>
371 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
372 __esimd_gather_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
373  __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets)
374 #ifdef __SYCL_DEVICE_ONLY__
375  ;
376 #else
377 {
378  static_assert(N == 1 || N == 8 || N == 16 || N == 32);
379  static_assert(TySizeLog2 <= 2 && Scale == 0);
380  static_assert(std::is_integral<Ty>::value || TySizeLog2 == 2);
381  __ESIMD_UNSUPPORTED_ON_HOST;
382 }
383 #endif // __SYCL_DEVICE_ONLY__
384 
385 // Low-level surface-based scatter. Writes elements of a \ref simd object into a
386 // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is
387 // always represented as a 4-byte value within the input simd object,
388 // unused (not written) upper bytes are ignored.
389 // Template (compile-time constant) parameters:
390 // @tparam Ty - element type; can only be a 4-byte integer or \c float,
391 // @tparam N - the number of elements to write
392 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
393 // accessor used to denote the surface
394 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
395 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
396 // @tparam Scale - offset scale; only 0 is supported for now
397 // @tparam L1H - L1 cache hint
398 // @tparam L3H - L3 cache hint
399 //
400 // Formal parameters:
401 // @param pred - per-element predicates; elements with zero corresponding
402 // predicates are not written
403 // @param surf_ind - the surface index, taken from the SYCL memory object
404 // @param global_offset - offset added to each individual element's offset to
405 // compute actual memory access offset for that element
406 // @param elem_offsets - per-element offsets
407 // @param vals - values to write
408 //
409 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
410  int16_t Scale = 0>
411 __ESIMD_INTRIN void
412 __esimd_scatter_scaled(__ESIMD_DNS::simd_mask_storage_t<N> pred,
413  SurfIndAliasTy surf_ind, uint32_t global_offset,
414  __ESIMD_DNS::vector_type_t<uint32_t, N> elem_offsets,
415  __ESIMD_DNS::vector_type_t<Ty, N> vals)
416 #ifdef __SYCL_DEVICE_ONLY__
417  ;
418 #else
419 {
420  static_assert(N == 1 || N == 8 || N == 16 || N == 32);
421  static_assert(TySizeLog2 <= 2);
422  static_assert(std::is_integral<Ty>::value || TySizeLog2 == 2);
423 
424  // determine the original element's type size (as __esimd_scatter_scaled
425  // requires vals to be a vector of 4-byte integers)
426  constexpr size_t OrigSize = __ESIMD_DNS::ElemsPerAddrDecoding(TySizeLog2);
427  using RestoredTy = __ESIMD_DNS::uint_type_t<OrigSize>;
428 
429  sycl::detail::ESIMDDeviceInterface *I =
431 
432  __ESIMD_DNS::vector_type_t<RestoredTy, N> TypeAdjustedVals;
433  if constexpr (OrigSize == 4) {
434  TypeAdjustedVals = __ESIMD_DNS::bitcast<RestoredTy, Ty, N>(vals);
435  } else {
436  static_assert(OrigSize == 1 || OrigSize == 2);
437  TypeAdjustedVals = __ESIMD_DNS::convert_vector<RestoredTy, Ty, N>(vals);
438  }
439 
440  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
441  // Scattered-store for Shared Local Memory
442  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
443  assert(global_offset == 0);
444  char *SlmBase = I->__cm_emu_get_slm_ptr();
445  for (int i = 0; i < N; ++i) {
446  if (pred[i]) {
447  RestoredTy *addr =
448  reinterpret_cast<RestoredTy *>(elem_offsets[i] + SlmBase);
449  *addr = TypeAdjustedVals[i];
450  }
451  }
452  } else {
453  // Scattered-store for regular surface indexed by surf_ind
454  char *writeBase;
455  uint32_t width;
456  std::mutex *mutexLock;
457 
458  I->sycl_get_cm_buffer_params_ptr(surf_ind, &writeBase, &width, &mutexLock);
459  writeBase += global_offset;
460 
461  std::lock_guard<std::mutex> lock(*mutexLock);
462 
463  for (int idx = 0; idx < N; idx++) {
464  if (pred[idx]) {
465  RestoredTy *addr =
466  reinterpret_cast<RestoredTy *>(elem_offsets[idx] + writeBase);
467  *addr = TypeAdjustedVals[idx];
468  }
469  }
470 
471  // TODO : Optimize
472  I->cm_fence_ptr();
473  }
474 }
475 #endif // __SYCL_DEVICE_ONLY__
476 
477 // flat_atomic: flat-address atomic
478 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
479 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
480 __esimd_svm_atomic0(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
481  __ESIMD_DNS::simd_mask_storage_t<N> pred)
482 #ifdef __SYCL_DEVICE_ONLY__
483  ;
484 #else
485 {
486  __ESIMD_UNSUPPORTED_ON_HOST;
487 }
488 #endif // __SYCL_DEVICE_ONLY__
489 
490 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
491 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
492 __esimd_svm_atomic1(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
493  __ESIMD_DNS::vector_type_t<Ty, N> src0,
494  __ESIMD_DNS::simd_mask_storage_t<N> pred)
495 #ifdef __SYCL_DEVICE_ONLY__
496  ;
497 #else
498 {
499  __ESIMD_DNS::vector_type_t<Ty, N> retv;
500 
501  for (int i = 0; i < N; i++) {
502  if (pred[i]) {
503  Ty *p = reinterpret_cast<Ty *>(addrs[i]);
504 
505  switch (Op) {
507  retv[i] = atomic_add_fetch<Ty>(p, src0[i]);
508  break;
509  default:
510  __ESIMD_UNSUPPORTED_ON_HOST;
511  }
512  }
513  }
514 
515  return retv;
516 }
517 #endif // __SYCL_DEVICE_ONLY__
518 
519 template <__ESIMD_NS::atomic_op Op, typename Ty, int N>
520 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
521 __esimd_svm_atomic2(__ESIMD_DNS::vector_type_t<uint64_t, N> addrs,
522  __ESIMD_DNS::vector_type_t<Ty, N> src0,
523  __ESIMD_DNS::vector_type_t<Ty, N> src1,
524  __ESIMD_DNS::simd_mask_storage_t<N> pred)
525 #ifdef __SYCL_DEVICE_ONLY__
526  ;
527 #else
528 {
529  __ESIMD_UNSUPPORTED_ON_HOST;
530 }
531 #endif // __SYCL_DEVICE_ONLY__
532 
533 __ESIMD_INTRIN void __esimd_slm_init(uint32_t size)
534 #ifdef __SYCL_DEVICE_ONLY__
535  ;
536 #else
537 {
538  sycl::detail::getESIMDDeviceInterface()->cm_slm_init_ptr(size);
539 }
540 #endif // ifndef __SYCL_DEVICE_ONLY__
541 
542 // esimd_barrier, generic group barrier
543 __ESIMD_INTRIN void __esimd_barrier()
544 #ifdef __SYCL_DEVICE_ONLY__
545  ;
546 #else
547 {
548  sycl::detail::getESIMDDeviceInterface()->cm_barrier_ptr();
549 }
550 #endif // __SYCL_DEVICE_ONLY__
551 
552 // slm_fence sets the SLM read/write order
553 __ESIMD_INTRIN void __esimd_fence(uint8_t cntl)
554 #ifdef __SYCL_DEVICE_ONLY__
555  ;
556 #else
557 {
558  sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr();
559 }
560 #endif // __SYCL_DEVICE_ONLY__
561 
562 // Scaled gather from a surface.
563 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
564  int16_t Scale = 0>
565 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
566 __esimd_gather_scaled(__ESIMD_DNS::simd_mask_storage_t<N> pred,
567  SurfIndAliasTy surf_ind, uint32_t global_offset,
568  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs)
569 #ifdef __SYCL_DEVICE_ONLY__
570  ;
571 #else
572 {
573  __ESIMD_DNS::vector_type_t<Ty, N> retv = 0;
574  sycl::detail::ESIMDDeviceInterface *I =
576  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
577  // Scattered-load for Shared Local Memory
578  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
579  assert(global_offset == 0);
580  char *SlmBase = I->__cm_emu_get_slm_ptr();
581  for (int i = 0; i < N; ++i) {
582  if (pred[i]) {
583  Ty *addr = reinterpret_cast<Ty *>(addrs[i] + SlmBase);
584  retv[i] = *addr;
585  }
586  }
587  } else {
588  // Scattered-load for regular surface indexed by surf_ind
589  char *readBase;
590  uint32_t width;
591  std::mutex *mutexLock;
592 
593  I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
594  readBase += global_offset;
595 
596  std::lock_guard<std::mutex> lock(*mutexLock);
597 
598  for (int idx = 0; idx < N; idx++) {
599  if (pred[idx]) {
600  Ty *addr = reinterpret_cast<Ty *>(addrs[idx] + readBase);
601  retv[idx] = *addr;
602  }
603  }
604 
605  // TODO : Optimize
606  I->cm_fence_ptr();
607  }
608 
609  return retv;
610 }
611 #endif // __SYCL_DEVICE_ONLY__
612 
613 // Predicated (masked) scaled gather from a surface.
614 //
615 // Template (compile-time constant) parameters:
616 // @tparam Ty - element type
617 // @tparam N - the number of elements to read
618 // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
619 // accessor used to denote the surface
620 // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
621 // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
622 // @tparam Scale - offset scale; only 0 is supported for now
623 //
624 // Formal parameters:
625 // @param surf_ind - the surface index, taken from the SYCL memory object
626 // @param global_offset - offset added to each individual element's offset to
627 // compute actual memory access offset for that element
628 // @param offsets - per-element offsets
629 // @param pred - per-element predicates; elements with zero corresponding
630 // predicates are not written
631 // @return - elements read ("gathered") from memory
632 
633 template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
634  int16_t Scale = 0>
635 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
636 __esimd_gather_masked_scaled2(SurfIndAliasTy surf_ind, uint32_t global_offset,
637  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
638  __ESIMD_DNS::simd_mask_storage_t<N> pred)
639 #ifdef __SYCL_DEVICE_ONLY__
640  ;
641 #else
642 {
643  static_assert(Scale == 0);
644 
645  // determine the original element's type size (as __esimd_scatter_scaled
646  // requires vals to be a vector of 4-byte integers)
647  constexpr size_t OrigSize = __ESIMD_DNS::ElemsPerAddrDecoding(TySizeLog2);
648  using RestoredTy = __ESIMD_DNS::uint_type_t<OrigSize>;
649 
650  __ESIMD_DNS::vector_type_t<RestoredTy, N> retv = 0;
651  sycl::detail::ESIMDDeviceInterface *I =
653 
654  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
655  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
656  assert(global_offset == 0);
657  char *SlmBase = I->__cm_emu_get_slm_ptr();
658  for (int idx = 0; idx < N; ++idx) {
659  if (pred[idx]) {
660  RestoredTy *addr =
661  reinterpret_cast<RestoredTy *>(offsets[idx] + SlmBase);
662  retv[idx] = *addr;
663  }
664  }
665  } else {
666  char *readBase;
667  uint32_t width;
668  std::mutex *mutexLock;
669 
670  I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
671 
672  readBase += global_offset;
673  std::lock_guard<std::mutex> lock(*mutexLock);
674  for (int idx = 0; idx < N; idx++) {
675  if (pred[idx]) {
676  RestoredTy *addr =
677  reinterpret_cast<RestoredTy *>(offsets[idx] + readBase);
678  retv[idx] = *addr;
679  }
680  }
681 
682  // TODO : Optimize
683  I->cm_fence_ptr();
684  }
685 
686  if constexpr (OrigSize == 4) {
687  return __ESIMD_DNS::bitcast<Ty, RestoredTy, N>(retv);
688  } else {
689  return __ESIMD_DNS::convert_vector<Ty, RestoredTy, N>(retv);
690  }
691 }
692 #endif // __SYCL_DEVICE_ONLY__
693 
694 // Reads a block of data from given surface at given offset, offset must be
695 // 16-byte-aligned.
696 template <typename Ty, int N, typename SurfIndAliasTy, int32_t IsModified = 0>
697 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
698 __esimd_oword_ld(SurfIndAliasTy surf_ind, uint32_t addr)
699 #ifdef __SYCL_DEVICE_ONLY__
700  ;
701 #else
702 {
703  addr <<= 4;
704 
705  __ESIMD_DNS::vector_type_t<Ty, N> retv;
706  sycl::detail::ESIMDDeviceInterface *I =
708 
709  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
710  // O-word/Block load for Shared Local Memory
711  // __ESIMD_NS::detail::SLM_BTI is special binding table index for SLM
712  char *SlmBase = I->__cm_emu_get_slm_ptr();
713  for (int i = 0; i < N; ++i) {
714  Ty *SlmAddr = reinterpret_cast<Ty *>(addr + SlmBase);
715  retv[i] = *SlmAddr;
716  addr += sizeof(Ty);
717  }
718  } else {
719  // O-word/Block load for regular surface indexed by surf_ind
720  char *readBase;
721  uint32_t width;
722  std::mutex *mutexLock;
723 
724  I->sycl_get_cm_buffer_params_ptr(surf_ind, &readBase, &width, &mutexLock);
725 
726  std::lock_guard<std::mutex> lock(*mutexLock);
727 
728  for (int idx = 0; idx < N; idx++) {
729  if (addr >= width) {
730  retv[idx] = 0;
731  } else {
732  retv[idx] = *((Ty *)(readBase + addr));
733  }
734  addr += (uint32_t)sizeof(Ty);
735  }
736  }
737  return retv;
738 }
739 #endif // __SYCL_DEVICE_ONLY__
740 
741 // gather4 scaled masked from a surface/SLM
742 template <typename Ty, int N, __ESIMD_NS::rgba_channel_mask Mask,
743  typename SurfIndAliasTy, int16_t Scale = 0>
744 __ESIMD_INTRIN
745  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)>
746  __esimd_gather4_masked_scaled2(
747  SurfIndAliasTy surf_ind, int global_offset,
748  __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
749  __ESIMD_DNS::simd_mask_storage_t<N> pred)
750 #ifdef __SYCL_DEVICE_ONLY__
751  ;
752 #else
753 {
754  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> retv = 0;
755  sycl::detail::ESIMDDeviceInterface *I =
757  char *ReadBase;
758  unsigned int Next = 0;
759 
760  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
761  ReadBase = I->__cm_emu_get_slm_ptr();
762  } else {
763  uint32_t width;
764  std::mutex *mutexLock;
765  I->sycl_get_cm_buffer_params_ptr(surf_ind, &ReadBase, &width, &mutexLock);
766  std::lock_guard<std::mutex> lock(*mutexLock);
767  }
768 
769  ReadBase += global_offset;
770 
771  for (const auto &channel : ChannelMaskArray) {
772  if (__ESIMD_NS::is_channel_enabled(Mask, channel)) {
773  for (int I = 0; I < N; I++, Next++) {
774  if (pred[I]) {
775  Ty *Addr = reinterpret_cast<Ty *>(ReadBase + offsets[I]);
776  retv[Next] = *Addr;
777  }
778  }
779  }
780  ReadBase += (uint64_t)sizeof(Ty);
781  }
782 
783  return retv;
784 }
785 #endif // __SYCL_DEVICE_ONLY__
786 
787 // scatter4 scaled to a surface/SLM
788 template <typename Ty, int N, typename SurfIndAliasTy,
789  __ESIMD_NS::rgba_channel_mask Mask, int16_t Scale = 0>
790 __ESIMD_INTRIN void __esimd_scatter4_scaled(
791  __ESIMD_DNS::simd_mask_storage_t<N> pred, SurfIndAliasTy surf_ind,
792  int global_offset, __ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
793  __ESIMD_DNS::vector_type_t<Ty, N * get_num_channels_enabled(Mask)> vals)
794 #ifdef __SYCL_DEVICE_ONLY__
795  ;
796 #else
797 {
798  sycl::detail::ESIMDDeviceInterface *I =
800  char *WriteBase;
801  unsigned int Next = 0;
802 
803  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
804  WriteBase = I->__cm_emu_get_slm_ptr();
805  } else {
806  uint32_t width;
807  std::mutex *mutexLock;
808  I->sycl_get_cm_buffer_params_ptr(surf_ind, &WriteBase, &width, &mutexLock);
809  std::lock_guard<std::mutex> lock(*mutexLock);
810  }
811 
812  WriteBase += global_offset;
813 
814  for (const auto &channel : ChannelMaskArray) {
815  if (__ESIMD_NS::is_channel_enabled(Mask, channel)) {
816  for (int I = 0; I < N; I++, Next++) {
817  if (pred[I]) {
818  Ty *Addr = reinterpret_cast<Ty *>(WriteBase + offsets[I]);
819  *Addr = vals[Next];
820  }
821  }
822  }
823  WriteBase += (uint64_t)sizeof(Ty);
824  }
825 }
826 #endif // __SYCL_DEVICE_ONLY__
827 
828 // Surface-based atomic operations
829 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
830 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
831 __esimd_dword_atomic0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
832  SurfIndAliasTy surf_ind,
833  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs)
834 #ifdef __SYCL_DEVICE_ONLY__
835  ;
836 #else
837 {
838  __ESIMD_DNS::vector_type_t<Ty, N> retv;
839 
840  if (surf_ind == __ESIMD_NS::detail::SLM_BTI) {
841  char *WriteBase =
842  sycl::detail::getESIMDDeviceInterface()->__cm_emu_get_slm_ptr();
843 
844  for (int i = 0; i < N; i++) {
845  if (pred[i]) {
846  Ty *p = reinterpret_cast<Ty *>(addrs[i] + WriteBase);
847 
848  switch (Op) {
849  case __ESIMD_NS::atomic_op::inc:
850  retv[i] = atomic_add_fetch<Ty>(p, 1);
851  break;
852  default:
853  __ESIMD_UNSUPPORTED_ON_HOST;
854  }
855  }
856  }
857  } else {
858  __ESIMD_UNSUPPORTED_ON_HOST;
859  }
860  return retv;
861 }
862 #endif // __SYCL_DEVICE_ONLY__
863 
864 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
865 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
866 __esimd_dword_atomic1(__ESIMD_DNS::simd_mask_storage_t<N> pred,
867  SurfIndAliasTy surf_ind,
868  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
869  __ESIMD_DNS::vector_type_t<Ty, N> src0)
870 #ifdef __SYCL_DEVICE_ONLY__
871  ;
872 #else
873 {
874  __ESIMD_UNSUPPORTED_ON_HOST;
875 }
876 #endif // __SYCL_DEVICE_ONLY__
877 
878 template <__ESIMD_NS::atomic_op Op, typename Ty, int N, typename SurfIndAliasTy>
879 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N>
880 __esimd_dword_atomic2(__ESIMD_DNS::simd_mask_storage_t<N> pred,
881  SurfIndAliasTy surf_ind,
882  __ESIMD_DNS::vector_type_t<uint32_t, N> addrs,
883  __ESIMD_DNS::vector_type_t<Ty, N> src0,
884  __ESIMD_DNS::vector_type_t<Ty, N> src1)
885 #ifdef __SYCL_DEVICE_ONLY__
886  ;
887 #else
888 {
889  __ESIMD_UNSUPPORTED_ON_HOST;
890 }
891 #endif // __SYCL_DEVICE_ONLY__
892 
893 // Media block load.
894 //
895 // @tparam Ty the element data type.
896 // @tparam M the hight of the 2D block.
897 // @tparam N the width of the 2D block.
898 // @tparam Modifier top/bottom field surface access control.
899 // @tparam TACC type of the surface handle.
900 // @tparam Plane planar surface index.
901 // @tparam BlockWidth the width of the return block.
902 // @param handle the surface handle.
903 // @param x X-coordinate of the left upper rectangle corner in BYTES.
904 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
905 //
906 // @return the linearized 2D block data read from surface.
907 //
908 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
909  int BlockWidth>
910 __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, M * N>
911 __esimd_media_ld(TACC handle, unsigned x, unsigned y)
912 #ifdef __SYCL_DEVICE_ONLY__
913  ;
914 #else
915 {
916  __ESIMD_DNS::vector_type_t<Ty, M * N> vals;
917  char *readBase;
918  uint32_t bpp;
919  uint32_t imgWidth;
920  uint32_t imgHeight;
921  std::mutex *mutexLock;
922 
923  assert((handle != __ESIMD_NS::detail::SLM_BTI) &&
924  "__esimd_media_ld cannot access SLM");
925 
926  sycl::detail::getESIMDDeviceInterface()->sycl_get_cm_image_params_ptr(
927  handle, &readBase, &imgWidth, &imgHeight, &bpp, &mutexLock);
928 
929  std::lock_guard<std::mutex> lock(*mutexLock);
930 
931  int x_pos_a, y_pos_a, offset, index;
932 
933  // TODO : Remove intermediate 'in' matrix
934  std::vector<std::vector<Ty>> in(M, std::vector<Ty>(N));
935  int R = M;
936  int C = N;
937  for (int i = 0; i < R; i++) {
938  for (int j = 0; j < C; j++) {
939  x_pos_a = x + j * sizeof(Ty);
940  { y_pos_a = y + i; }
941  // We should check the boundary condition based on sizeof(Ty), x_pos_a is
942  // 0-based Note: Use a signed variable; otherwise sizeof(Ty) is unsigned
943  if ((x_pos_a + sizeof(Ty)) > imgWidth) {
944  // If we're trying to read outside the boundary, limit the value of
945  // x_pos_a Assumption -- We don't this situation:
946  // x_pos_a width's boundary
947  // | |
948  // <---type(Ty)--->
949  // At most x_pos_a+sizeof(Ty) is exactly at the boundary.
950  x_pos_a = imgWidth;
951  }
952  if (y_pos_a > imgHeight - 1) {
953  y_pos_a = imgHeight - 1;
954  }
955  if (y_pos_a < 0) {
956  y_pos_a = 0;
957  }
958  {
959  if (x_pos_a < 0) {
960  // Need to align x position to bbp
961  int offset = x % bpp;
962  x_pos_a -= offset;
963  }
964  while (x_pos_a < 0) {
965  // If we're trying to read outside the left boundary, increase x_pos_a
966  x_pos_a += bpp;
967  }
968  }
969 
970  if (x_pos_a >= imgWidth) {
971  {
972  x_pos_a = x_pos_a - bpp;
973  for (uint byte_count = 0; byte_count < sizeof(Ty); byte_count++) {
974  if (x_pos_a >= imgWidth) {
975  x_pos_a = x_pos_a - bpp;
976  }
977  offset = y_pos_a * imgWidth + x_pos_a;
978 
979  /*
980  If destination size per element is less then or equal pixel size
981  of the surface move the pixel value accross the destination
982  elements. If destination size per element is greater then pixel
983  size of the surface replicate pixel value in the destination
984  element.
985  */
986  if (sizeof(Ty) <= bpp) {
987  for (uint bpp_count = 0; j < C && bpp_count < bpp;
988  j++, bpp_count += sizeof(Ty)) {
989  in[i][j] = *((Ty *)(readBase + offset + bpp_count));
990  }
991  j--;
992  break;
993  } else {
994  // ((unsigned char*)in.get_addr(i*C+j))[byte_count] = *((unsigned
995  // char*)((char*)buff_iter->p + offset));
996  unsigned char *pTempBase =
997  ((unsigned char *)in[i].data()) + j * sizeof(Ty);
998  pTempBase[byte_count] = *((unsigned char *)(readBase + offset));
999  }
1000 
1001  x_pos_a = x_pos_a + 1;
1002  }
1003  x_pos_a = imgWidth;
1004  }
1005  } else {
1006  offset = y_pos_a * imgWidth + x_pos_a;
1007  { in[i][j] = *((Ty *)(readBase + offset)); }
1008  }
1009  }
1010  }
1011 
1012  for (auto i = 0, k = 0; i < M; i++) {
1013  for (auto j = 0; j < N; j++) {
1014  vals[k++] = in[i][j];
1015  }
1016  }
1017 
1018  return vals;
1019 }
1020 #endif // __SYCL_DEVICE_ONLY__
1021 
1022 // Media block store
1023 //
1024 // @tparam Ty the element data type.
1025 // @tparam M the hight of the 2D block.
1026 // @tparam N the width of the 2D block.
1027 // @tparam Modifier top/bottom field surface access control.
1028 // @tparam TACC type of the surface handle.
1029 // @tparam Plane planar surface index.
1030 // @tparam BlockWidth the width of the return block.
1031 // @param handle the surface handle.
1032 // @param x X-coordinate of the left upper rectangle corner in BYTES.
1033 // @param y Y-coordinate of the left upper rectangle corner in ROWS.
1034 // @param vals the linearized 2D block data to be written to surface.
1035 //
1036 template <typename Ty, int M, int N, int Modifier, typename TACC, int Plane,
1037  int BlockWidth>
1038 __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
1039  __ESIMD_DNS::vector_type_t<Ty, M * N> vals)
1040 #ifdef __SYCL_DEVICE_ONLY__
1041  ;
1042 #else
1043 {
1044  sycl::detail::ESIMDDeviceInterface *I =
1046 
1047  char *writeBase;
1048  uint32_t bpp;
1049  uint32_t imgWidth;
1050  uint32_t imgHeight;
1051  std::mutex *mutexLock;
1052 
1053  assert((handle != __ESIMD_NS::detail::SLM_BTI) &&
1054  "__esimd_media_ld cannot access SLM");
1055 
1056  I->sycl_get_cm_image_params_ptr(handle, &writeBase, &imgWidth, &imgHeight,
1057  &bpp, &mutexLock);
1058 
1059  int x_pos_a, y_pos_a, offset;
1060 
1061  assert((x % 4) == 0);
1062  assert((N * sizeof(Ty)) % 4 == 0);
1063 
1064  // TODO : Remove intermediate 'out' matrix
1065  std::vector<std::vector<Ty>> out(M, std::vector<Ty>(N));
1066 
1067  std::lock_guard<std::mutex> lock(*mutexLock);
1068 
1069  for (int i = 0, k = 0; i < M; i++) {
1070  for (int j = 0; j < N; j++) {
1071  out[i][j] = vals[k++];
1072  }
1073  }
1074 
1075  for (int i = 0; i < M; i++) {
1076  for (int j = 0; j < N; j++) {
1077  x_pos_a = x + j * sizeof(Ty);
1078  { y_pos_a = y + i; }
1079  if ((int)x_pos_a < 0) {
1080  continue;
1081  }
1082  if ((int)y_pos_a < 0) {
1083  continue;
1084  }
1085  if ((int)(x_pos_a + sizeof(Ty)) > imgWidth) {
1086  continue;
1087  }
1088 
1089  if ((int)y_pos_a > imgHeight - 1) {
1090  continue;
1091  }
1092  offset = y_pos_a * imgWidth + x_pos_a;
1093  *((Ty *)(writeBase + offset)) = out[i][j];
1094  }
1095  }
1096 
1097  // TODO : Optimize
1098  I->cm_fence_ptr();
1099 }
1100 #endif // __SYCL_DEVICE_ONLY__
1101 
1102 // getter methods returning surface index are not available when stateless
1103 // memory accesses are enforced.
1104 #ifndef __ESIMD_FORCE_STATELESS_MEM
1105 
1106 // \brief Converts given value to a surface index.
1107 // The input must always be a result of
1108 // detail::AccessorPrivateProxy::getNativeImageObj(acc)
1109 // where acc is a buffer or image accessor. If the result is, say, 'obj', then
1110 // 'obj' is really a value of the surface index kept in a differently typed
1111 // accessor field. Front-end compilation time type of 'obj' is either
1112 // ConcreteASPtrType (detail::DecoratedType<DataT, AS>::type *), for a buffer
1113 // or
1114 // image{1,2,3}d_t OpenCL type for an image
1115 // But when doing code generation, FE replaces e.g. '__read_only image2d_t' FE
1116 // type with '%opencl.image2d_ro_t addrspace(1) *' LLVM type.
1117 // image2d_t can neither be reinterpret_cast'ed from pointer to intptr_t
1118 // (because it is not a pointer at FE translation time), nor it can be
1119 // bit_cast'ed to intptr_t (because it is not trivially copyable). This
1120 // intrinsic takes advantage of the fact that in LLVM IR 'obj' is always a
1121 // pointer, where we can do ptr to uint32_t conversion.
1122 // This intrinsic can be called only from the device code, as
1123 // accessor => memory handle translation for host is different.
1124 // @param acc the SYCL accessor.
1125 // getNativeImageObj.
1126 // Returns the binding table index value.
1127 template <typename MemObjTy>
1128 __ESIMD_INTRIN __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
1129 #ifdef __SYCL_DEVICE_ONLY__
1130  ;
1131 #else // __SYCL_DEVICE_ONLY__
1132 {
1133  return sycl::detail::getESIMDDeviceInterface()->sycl_get_cm_surface_index_ptr(
1134  __ESIMD_DNS::AccessorPrivateProxy::getPtr(obj));
1135 }
1136 #endif // __SYCL_DEVICE_ONLY__
1137 
1138 #endif // !__ESIMD_FORCE_STATELESS_MEM
1139 
cl::sycl::ext::intel::esimd::rgba_channel_mask
rgba_channel_mask
Represents a pixel's channel mask - all possible combinations of enabled channels.
Definition: common.hpp:123
esimd_emulator_device_interface.hpp
cl::sycl::detail::getESIMDDeviceInterface
ESIMDDeviceInterface * getESIMDDeviceInterface()
Definition: esimd_emulator_device_interface.cpp:25
cl::sycl::detail::device_global_map::add
void add(const void *DeviceGlobalPtr, const char *UniqueId)
Definition: device_global_map.cpp:16
util.hpp
pi.hpp
cl::sycl::ext::intel::esimd::rgba_channel::R
@ R
cl::sycl::ext::intel::esimd::SurfaceIndex
unsigned int SurfaceIndex
Surface index type.
Definition: common.hpp:105
cl::sycl::ext::intel::esimd::get_num_channels_enabled
constexpr int get_num_channels_enabled(rgba_channel_mask M)
Definition: common.hpp:146
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
types.hpp
accessor_impl.hpp
atomic_intrin.hpp
backend_types.hpp
cl::sycl::ext::intel::esimd::detail::SLM_BTI
static constexpr SurfaceIndex SLM_BTI
Definition: common.hpp:116
common.hpp
types.hpp
cl::sycl::uint
unsigned int uint
Definition: aliases.hpp:73
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::ext::intel::esimd::is_channel_enabled
constexpr int is_channel_enabled(rgba_channel_mask M, rgba_channel Ch)
Definition: common.hpp:141
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12