DPC++ Runtime
Runtime libraries for oneAPI DPC++
group.hpp
Go to the documentation of this file.
1 //==-------------- group.hpp --- SYCL work group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp>
14 #include <stdexcept>
15 #include <sycl/detail/common.hpp>
17 #include <sycl/detail/helpers.hpp>
18 #include <sycl/detail/spirv.hpp>
19 #include <sycl/device_event.hpp>
20 #include <sycl/h_item.hpp>
21 #include <sycl/id.hpp>
22 #include <sycl/memory_enums.hpp>
23 #include <sycl/pointers.hpp>
24 #include <sycl/range.hpp>
25 #include <type_traits>
26 
27 namespace sycl {
29 namespace detail {
30 class Builder;
31 
32 // Implements a barrier accross work items within a work group.
33 static inline void workGroupBarrier() {
34 #ifdef __SYCL_DEVICE_ONLY__
35  constexpr uint32_t flags =
36  static_cast<uint32_t>(
38  static_cast<uint32_t>(__spv::MemorySemanticsMask::WorkgroupMemory);
40  flags);
41 #endif // __SYCL_DEVICE_ONLY__
42 }
43 
44 } // namespace detail
45 
46 // SYCL 1.2.1rev5, section "4.8.5.3 Parallel For hierarchical invoke":
47 // Quote:
48 // ... To guarantee use of private per-work-item memory, the private_memory
49 // class can be used to wrap the data. This class very simply constructs
50 // private data for a given group across the entire group.The id of the
51 // current work-item is passed to any access to grab the correct data.
52 template <typename T, int Dimensions = 1>
53 class __SYCL_TYPE(private_memory) private_memory {
54 public:
55  // Construct based directly off the number of work-items
56  private_memory(const group<Dimensions> &G) {
57 #ifndef __SYCL_DEVICE_ONLY__
58  // serial host => one instance per work-group - allocate space for each WI
59  // in the group:
60  Val.reset(new T[G.get_local_range().size()]);
61 #endif // __SYCL_DEVICE_ONLY__
62  (void)G;
63  }
64 
65  // Access the instance for the current work-item
66  T &operator()(const h_item<Dimensions> &Id) {
67 #ifndef __SYCL_DEVICE_ONLY__
68  // Calculate the linear index of current WI and return reference to the
69  // corresponding spot in the value array:
70  size_t Ind = Id.get_physical_local().get_linear_id();
71  return Val.get()[Ind];
72 #else
73  (void)Id;
74  return Val;
75 #endif // __SYCL_DEVICE_ONLY__
76  }
77 
78 private:
79 #ifdef __SYCL_DEVICE_ONLY__
80  // On SYCL device private_memory<T> instance is created per physical WI, so
81  // there is 1:1 correspondence betwen this class instances and per-WI memory.
82  T Val;
83 #else
84  // On serial host there is one private_memory<T> instance per work group, so
85  // it must have space to hold separate value per WI in the group.
86  std::unique_ptr<T[]> Val;
87 #endif // #ifdef __SYCL_DEVICE_ONLY__
88 };
89 
94 template <int Dimensions = 1> class __SYCL_TYPE(group) group {
95 public:
96 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
97  using id_type = id<Dimensions>;
98  using range_type = range<Dimensions>;
99  using linear_id_type = size_t;
100  static constexpr int dimensions = Dimensions;
101 #endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
102 
103  static constexpr sycl::memory_scope fence_scope =
104  sycl::memory_scope::work_group;
105 
106  group() = delete;
107 
108  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
109  id<Dimensions> get_id() const { return index; }
110 
111  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
112  size_t get_id(int dimension) const { return index[dimension]; }
113 
114  id<Dimensions> get_group_id() const { return index; }
115 
116  size_t get_group_id(int dimension) const { return index[dimension]; }
117 
118  __SYCL2020_DEPRECATED("calculate sycl::group::get_group_range() * "
119  "sycl::group::get_max_local_range() instead")
120  range<Dimensions> get_global_range() const { return globalRange; }
121 
122  size_t get_global_range(int dimension) const {
123  return globalRange[dimension];
124  }
125 
126  id<Dimensions> get_local_id() const {
127 #ifdef __SYCL_DEVICE_ONLY__
128  return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
129 #else
130  throw runtime_error("get_local_id() is not implemented on host device",
131  PI_ERROR_INVALID_DEVICE);
132  // Implementing get_local_id() on host device requires ABI breaking change.
133  // It requires extending class group with local item which represents
134  // local_id. Currently this local id is only used in nd_item and group
135  // cannot access it.
136 #endif
137  }
138 
139  size_t get_local_id(int dimention) const { return get_local_id()[dimention]; }
140 
141  size_t get_local_linear_id() const {
142  return get_local_linear_id_impl<Dimensions>();
143  }
144 
145  range<Dimensions> get_local_range() const { return localRange; }
146 
147  size_t get_local_range(int dimension) const { return localRange[dimension]; }
148 
149  size_t get_local_linear_range() const {
150  return get_local_linear_range_impl();
151  }
152 
153  range<Dimensions> get_group_range() const { return groupRange; }
154 
155  size_t get_group_range(int dimension) const {
156  return get_group_range()[dimension];
157  }
158 
159  size_t get_group_linear_range() const {
160  return get_group_linear_range_impl();
161  }
162 
163  range<Dimensions> get_max_local_range() const { return get_local_range(); }
164 
165  size_t operator[](int dimension) const { return index[dimension]; }
166 
167  __SYCL2020_DEPRECATED("use sycl::group::get_group_linear_id() instead")
168  size_t get_linear_id() const { return get_group_linear_id(); }
169 
170  size_t get_group_linear_id() const { return get_group_linear_id_impl(); }
171 
172  bool leader() const { return (get_local_linear_id() == 0); }
173 
174  template <typename WorkItemFunctionT>
175  void parallel_for_work_item(WorkItemFunctionT Func) const {
176  // need barriers to enforce SYCL semantics for the work item loop -
177  // compilers are expected to optimize when possible
179 #ifdef __SYCL_DEVICE_ONLY__
180  range<Dimensions> GlobalSize{
181  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
182  range<Dimensions> LocalSize{
183  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
184  id<Dimensions> GlobalId{
185  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
186  id<Dimensions> LocalId{
187  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
188 
189  // no 'iterate' in the device code variant, because
190  // (1) this code is already invoked by each work item as a part of the
191  // enclosing parallel_for_work_group kernel
192  // (2) the range this pfwi iterates over matches work group size exactly
193  item<Dimensions, false> GlobalItem =
194  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
195  item<Dimensions, false> LocalItem =
196  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
197  h_item<Dimensions> HItem =
198  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
199 
200  Func(HItem);
201 #else
202  id<Dimensions> GroupStartID = index * localRange;
203 
204  // ... host variant needs explicit 'iterate' because it is serial
205  detail::NDLoop<Dimensions>::iterate(
206  localRange, [&](const id<Dimensions> &LocalID) {
207  item<Dimensions, false> GlobalItem =
208  detail::Builder::createItem<Dimensions, false>(
209  globalRange, GroupStartID + LocalID);
210  item<Dimensions, false> LocalItem =
211  detail::Builder::createItem<Dimensions, false>(localRange,
212  LocalID);
213  h_item<Dimensions> HItem =
214  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
215  Func(HItem);
216  });
217 #endif // __SYCL_DEVICE_ONLY__
218  // Need both barriers here - before and after the parallel_for_work_item
219  // (PFWI). There can be work group scope code after the PFWI which reads
220  // work group local data written within this PFWI. Back Ends are expected to
221  // optimize away unneeded barriers (e.g. two barriers in a row).
223  }
224 
225  template <typename WorkItemFunctionT>
226  void parallel_for_work_item(range<Dimensions> flexibleRange,
227  WorkItemFunctionT Func) const {
229 #ifdef __SYCL_DEVICE_ONLY__
230  range<Dimensions> GlobalSize{
231  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
232  range<Dimensions> LocalSize{
233  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
234  id<Dimensions> GlobalId{
235  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
236  id<Dimensions> LocalId{
237  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
238 
239  item<Dimensions, false> GlobalItem =
240  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
241  item<Dimensions, false> LocalItem =
242  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
243  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
244  GlobalItem, LocalItem, flexibleRange);
245 
246  // iterate over flexible range with work group size stride; each item
247  // performs flexibleRange/LocalSize iterations (if the former is divisible
248  // by the latter)
249  detail::NDLoop<Dimensions>::iterate(
250  LocalId, LocalSize, flexibleRange,
251  [&](const id<Dimensions> &LogicalLocalID) {
252  HItem.setLogicalLocalID(LogicalLocalID);
253  Func(HItem);
254  });
255 #else
256  id<Dimensions> GroupStartID = index * localRange;
257 
258  detail::NDLoop<Dimensions>::iterate(
259  localRange, [&](const id<Dimensions> &LocalID) {
260  item<Dimensions, false> GlobalItem =
261  detail::Builder::createItem<Dimensions, false>(
262  globalRange, GroupStartID + LocalID);
263  item<Dimensions, false> LocalItem =
264  detail::Builder::createItem<Dimensions, false>(localRange,
265  LocalID);
266  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
267  GlobalItem, LocalItem, flexibleRange);
268 
269  detail::NDLoop<Dimensions>::iterate(
270  LocalID, localRange, flexibleRange,
271  [&](const id<Dimensions> &LogicalLocalID) {
272  HItem.setLogicalLocalID(LogicalLocalID);
273  Func(HItem);
274  });
275  });
276 #endif // __SYCL_DEVICE_ONLY__
278  }
279 
282  template <access::mode accessMode = access::mode::read_write>
283  void mem_fence(
284  typename detail::enable_if_t<accessMode == access::mode::read ||
285  accessMode == access::mode::write ||
286  accessMode == access::mode::read_write,
288  accessSpace = access::fence_space::global_and_local) const {
289  uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace);
290  // TODO: currently, there is no good way in SPIR-V to set the memory
291  // barrier only for load operations or only for store operations.
292  // The full read-and-write barrier is used and the template parameter
293  // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be
294  // changed to address this discrepancy between SPIR-V and SYCL,
295  // or if we decide that 'accessMode' is the important feature then
296  // we can fix this later, for example, by using OpenCL 1.2 functions
297  // read_mem_fence() and write_mem_fence().
299  }
300 
306  template <typename dataT>
307  detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
308  async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
309  size_t numElements, size_t srcStride) const {
310  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
311  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
312 
314  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
315  numElements, srcStride, 0);
316  return device_event(E);
317  }
318 
324  template <typename dataT>
325  detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
326  async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
327  size_t numElements, size_t destStride) const {
328  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
329  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
330 
332  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
333  numElements, destStride, 0);
334  return device_event(E);
335  }
336 
342  template <typename T, access::address_space DestS, access::address_space SrcS,
343  access::decorated DestIsDecorated, access::decorated SrcIsDecorated>
344  detail::enable_if_t<detail::is_scalar_bool<T>::value, device_event>
345  async_work_group_copy(multi_ptr<T, DestS, DestIsDecorated> Dest,
346  multi_ptr<T, SrcS, SrcIsDecorated> Src,
347  size_t NumElements, size_t Stride) const {
348  static_assert(sizeof(bool) == sizeof(uint8_t),
349  "Async copy to/from bool memory is not supported.");
350  auto DestP = multi_ptr<uint8_t, DestS, DestIsDecorated>(
351  reinterpret_cast<uint8_t *>(Dest.get()));
352  auto SrcP = multi_ptr<uint8_t, SrcS, SrcIsDecorated>(
353  reinterpret_cast<uint8_t *>(Src.get()));
354  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
355  }
356 
362  template <typename T, access::address_space DestS, access::address_space SrcS,
363  access::decorated DestIsDecorated, access::decorated SrcIsDecorated>
364  detail::enable_if_t<detail::is_vector_bool<T>::value, device_event>
365  async_work_group_copy(multi_ptr<T, DestS, DestIsDecorated> Dest,
366  multi_ptr<T, SrcS, SrcIsDecorated> Src,
367  size_t NumElements, size_t Stride) const {
368  static_assert(sizeof(bool) == sizeof(uint8_t),
369  "Async copy to/from bool memory is not supported.");
370  using VecT = detail::change_base_type_t<T, uint8_t>;
371  auto DestP = address_space_cast<DestS, DestIsDecorated>(
372  reinterpret_cast<VecT *>(Dest.get()));
373  auto SrcP = address_space_cast<SrcS, SrcIsDecorated>(
374  reinterpret_cast<VecT *>(Src.get()));
375  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
376  }
377 
383  template <typename dataT>
384  device_event async_work_group_copy(local_ptr<dataT> dest,
385  global_ptr<dataT> src,
386  size_t numElements) const {
387  return async_work_group_copy(dest, src, numElements, 1);
388  }
389 
395  template <typename dataT>
396  device_event async_work_group_copy(global_ptr<dataT> dest,
397  local_ptr<dataT> src,
398  size_t numElements) const {
399  return async_work_group_copy(dest, src, numElements, 1);
400  }
401 
402  template <typename... eventTN> void wait_for(eventTN... Events) const {
403  waitForHelper(Events...);
404  }
405 
406  bool operator==(const group<Dimensions> &rhs) const {
407  bool Result = (rhs.globalRange == globalRange) &&
408  (rhs.localRange == localRange) && (rhs.index == index);
409  __SYCL_ASSERT(rhs.groupRange == groupRange &&
410  "inconsistent group class fields");
411  return Result;
412  }
413 
414  bool operator!=(const group<Dimensions> &rhs) const {
415  return !((*this) == rhs);
416  }
417 
418 private:
419  range<Dimensions> globalRange;
420  range<Dimensions> localRange;
421  range<Dimensions> groupRange;
422  id<Dimensions> index;
423 
424  template <int dims = Dimensions>
425  typename detail::enable_if_t<(dims == 1), size_t>
426  get_local_linear_id_impl() const {
427  id<Dimensions> localId = get_local_id();
428  return localId[0];
429  }
430 
431  template <int dims = Dimensions>
432  typename detail::enable_if_t<(dims == 2), size_t>
433  get_local_linear_id_impl() const {
434  id<Dimensions> localId = get_local_id();
435  return localId[0] * localRange[1] + localId[1];
436  }
437 
438  template <int dims = Dimensions>
439  typename detail::enable_if_t<(dims == 3), size_t>
440  get_local_linear_id_impl() const {
441  id<Dimensions> localId = get_local_id();
442  return (localId[0] * localRange[1] * localRange[2]) +
443  (localId[1] * localRange[2]) + localId[2];
444  }
445 
446  template <int dims = Dimensions>
447  typename detail::enable_if_t<(dims == 1), size_t>
448  get_local_linear_range_impl() const {
449  auto localRange = get_local_range();
450  return localRange[0];
451  }
452 
453  template <int dims = Dimensions>
454  typename detail::enable_if_t<(dims == 2), size_t>
455  get_local_linear_range_impl() const {
456  auto localRange = get_local_range();
457  return localRange[0] * localRange[1];
458  }
459 
460  template <int dims = Dimensions>
461  typename detail::enable_if_t<(dims == 3), size_t>
462  get_local_linear_range_impl() const {
463  auto localRange = get_local_range();
464  return localRange[0] * localRange[1] * localRange[2];
465  }
466 
467  template <int dims = Dimensions>
468  typename detail::enable_if_t<(dims == 1), size_t>
469  get_group_linear_range_impl() const {
470  auto groupRange = get_group_range();
471  return groupRange[0];
472  }
473 
474  template <int dims = Dimensions>
475  typename detail::enable_if_t<(dims == 2), size_t>
476  get_group_linear_range_impl() const {
477  auto groupRange = get_group_range();
478  return groupRange[0] * groupRange[1];
479  }
480 
481  template <int dims = Dimensions>
482  typename detail::enable_if_t<(dims == 3), size_t>
483  get_group_linear_range_impl() const {
484  auto groupRange = get_group_range();
485  return groupRange[0] * groupRange[1] * groupRange[2];
486  }
487 
488  template <int dims = Dimensions>
489  typename detail::enable_if_t<(dims == 1), size_t>
490  get_group_linear_id_impl() const {
491  return index[0];
492  }
493 
494  template <int dims = Dimensions>
495  typename detail::enable_if_t<(dims == 2), size_t>
496  get_group_linear_id_impl() const {
497  return index[0] * groupRange[1] + index[1];
498  }
499 
500  // SYCL specification 1.2.1rev5, section 4.7.6.5 "Buffer accessor":
501  // Whenever a multi-dimensional index is passed to a SYCL accessor the
502  // linear index is calculated based on the index {id1, id2, id3} provided
503  // and the range of the SYCL accessor {r1, r2, r3} according to row-major
504  // ordering as follows:
505  // id3 + (id2 · r3) + (id1 · r3 · r2) (4.3)
506  // section 4.8.1.8 "group class":
507  // size_t get_linear_id()const
508  // Get a linearized version of the work-group id. Calculating a linear
509  // work-group id from a multi-dimensional index follows the equation 4.3.
510  template <int dims = Dimensions>
511  typename detail::enable_if_t<(dims == 3), size_t>
512  get_group_linear_id_impl() const {
513  return (index[0] * groupRange[1] * groupRange[2]) +
514  (index[1] * groupRange[2]) + index[2];
515  }
516 
517  void waitForHelper() const {}
518 
519  void waitForHelper(device_event Event) const { Event.wait(); }
520 
521  template <typename T, typename... Ts>
522  void waitForHelper(T E, Ts... Es) const {
523  waitForHelper(E);
524  waitForHelper(Es...);
525  }
526 
527 protected:
528  friend class detail::Builder;
529  group(const range<Dimensions> &G, const range<Dimensions> &L,
530  const range<Dimensions> GroupRange, const id<Dimensions> &I)
531  : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
532  // Make sure local range divides global without remainder:
533  __SYCL_ASSERT(((G % L).size() == 0) &&
534  "global range is not multiple of local");
535  __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) &&
536  "inconsistent group constructor arguments");
537  }
538 };
539 
540 template <int Dims>
541 __SYCL_DEPRECATED("use sycl::ext::oneapi::experimental::this_group() instead")
542 group<Dims> this_group() {
543 #ifdef __SYCL_DEVICE_ONLY__
544  return detail::Builder::getElement(detail::declptr<group<Dims>>());
545 #else
546  throw sycl::exception(
547  sycl::make_error_code(sycl::errc::feature_not_supported),
548  "Free function calls are not supported on host device");
549 #endif
550 }
551 
552 namespace ext::oneapi::experimental {
553 template <int Dims> group<Dims> this_group() {
554 #ifdef __SYCL_DEVICE_ONLY__
555  return sycl::detail::Builder::getElement(
557 #else
558  throw sycl::exception(
559  sycl::make_error_code(sycl::errc::feature_not_supported),
560  "Free function calls are not supported on host device");
561 #endif
562 }
563 } // namespace ext::oneapi::experimental
564 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
565 } // namespace sycl
#define __SYCL_ASSERT(x)
Definition: common.hpp:197
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_DEPRECATED(message)
#define __SYCL2020_DEPRECATED(message)
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:199
typename std::enable_if< B, T >::type enable_if_t
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
Definition: stream.hpp:111
size_t get_local_linear_range(Group g)
static void workGroupBarrier()
Definition: group.hpp:33
Group::linear_id_type get_local_linear_id(Group g)
constexpr std::enable_if_t< detail::IsCompileTimeProperty< PropertyT >::value, bool > operator!=(const property_value< PropertyT, A... > &, const property_value< PropertyT, B... > &)
T & operator[](std::ptrdiff_t idx) const noexcept
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:2744
constexpr mode_tag_t< access_mode::read_write > read_write
Definition: access.hpp:73
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:91
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1030
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1042
void * __ocl_event_t
bool operator==(const Slab &Lhs, const Slab &Rhs)