DPC++ Runtime
Runtime libraries for oneAPI DPC++
group.hpp
Go to the documentation of this file.
1 //==-------------- group.hpp --- SYCL work group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp> // for __spirv_MemoryBarrier
12 #include <CL/__spirv/spirv_types.hpp> // for Scope, __ocl_event_t
13 #include <sycl/access/access.hpp> // for decorated, mode, addr...
14 #include <sycl/detail/common.hpp> // for NDLoop, __SYCL_ASSERT
15 #include <sycl/detail/defines.hpp> // for __SYCL_TYPE
16 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
17 #include <sycl/detail/generic_type_traits.hpp> // for convertToOpenCLType
18 #include <sycl/detail/helpers.hpp> // for Builder, getSPIRVMemo...
19 #include <sycl/detail/item_base.hpp> // for id, range
20 #include <sycl/detail/type_traits.hpp> // for is_bool, change_base_...
21 #include <sycl/device_event.hpp> // for device_event
22 #include <sycl/exception.hpp> // for make_error_code, errc
23 #include <sycl/h_item.hpp> // for h_item
24 #include <sycl/id.hpp> // for id
25 #include <sycl/item.hpp> // for item
26 #include <sycl/memory_enums.hpp> // for memory_scope
27 #include <sycl/multi_ptr.hpp> // for multi_ptr, address_sp...
28 #include <sycl/pointers.hpp> // for decorated_global_ptr
29 #include <sycl/range.hpp> // for range
30 
31 #include <memory> // for unique_ptr
32 #include <stddef.h> // for size_t
33 #include <stdint.h> // for uint8_t, uint32_t
34 #include <type_traits> // for enable_if_t, remove_c...
35 
36 namespace sycl {
37 inline namespace _V1 {
38 namespace detail {
39 class Builder;
40 
41 // Implements a barrier accross work items within a work group.
42 inline void workGroupBarrier() {
43 #ifdef __SYCL_DEVICE_ONLY__
44  constexpr uint32_t flags =
45  static_cast<uint32_t>(
47  static_cast<uint32_t>(__spv::MemorySemanticsMask::WorkgroupMemory);
49  flags);
50 #endif // __SYCL_DEVICE_ONLY__
51 }
52 
53 } // namespace detail
54 
55 // SYCL 1.2.1rev5, section "4.8.5.3 Parallel For hierarchical invoke":
56 // Quote:
57 // ... To guarantee use of private per-work-item memory, the private_memory
58 // class can be used to wrap the data. This class very simply constructs
59 // private data for a given group across the entire group.The id of the
60 // current work-item is passed to any access to grab the correct data.
61 template <typename T, int Dimensions = 1>
62 class __SYCL_TYPE(private_memory) private_memory {
63 public:
64  // Construct based directly off the number of work-items
65  private_memory(const group<Dimensions> &G) {
66 #ifndef __SYCL_DEVICE_ONLY__
67  // serial host => one instance per work-group - allocate space for each WI
68  // in the group:
69  Val.reset(new T[G.get_local_range().size()]);
70 #endif // __SYCL_DEVICE_ONLY__
71  (void)G;
72  }
73 
74  // Access the instance for the current work-item
75  T &operator()(const h_item<Dimensions> &Id) {
76 #ifndef __SYCL_DEVICE_ONLY__
77  // Calculate the linear index of current WI and return reference to the
78  // corresponding spot in the value array:
79  size_t Ind = Id.get_physical_local().get_linear_id();
80  return Val.get()[Ind];
81 #else
82  (void)Id;
83  return Val;
84 #endif // __SYCL_DEVICE_ONLY__
85  }
86 
87 private:
88 #ifdef __SYCL_DEVICE_ONLY__
89  // On SYCL device private_memory<T> instance is created per physical WI, so
90  // there is 1:1 correspondence betwen this class instances and per-WI memory.
91  T Val;
92 #else
93  // On serial host there is one private_memory<T> instance per work group, so
94  // it must have space to hold separate value per WI in the group.
95  std::unique_ptr<T[]> Val;
96 #endif // #ifdef __SYCL_DEVICE_ONLY__
97 };
98 
103 template <int Dimensions = 1> class __SYCL_TYPE(group) group {
104 public:
105 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
106  using id_type = id<Dimensions>;
107  using range_type = range<Dimensions>;
108  using linear_id_type = size_t;
109  static constexpr int dimensions = Dimensions;
110 #endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
111 
112  static constexpr sycl::memory_scope fence_scope =
113  sycl::memory_scope::work_group;
114 
115  group() = delete;
116 
117  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
118  id<Dimensions> get_id() const { return index; }
119 
120  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
121  size_t get_id(int dimension) const { return index[dimension]; }
122 
123  id<Dimensions> get_group_id() const { return index; }
124 
125  size_t get_group_id(int dimension) const { return index[dimension]; }
126 
127  __SYCL2020_DEPRECATED("calculate sycl::group::get_group_range() * "
128  "sycl::group::get_max_local_range() instead")
129  range<Dimensions> get_global_range() const { return globalRange; }
130 
131  size_t get_global_range(int dimension) const {
132  return globalRange[dimension];
133  }
134 
135  id<Dimensions> get_local_id() const {
136 #ifdef __SYCL_DEVICE_ONLY__
137  return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
138 #else
140  "get_local_id() is not implemented on host");
141 #endif
142  }
143 
144  size_t get_local_id(int dimention) const { return get_local_id()[dimention]; }
145 
146  size_t get_local_linear_id() const {
147  return get_local_linear_id_impl<Dimensions>();
148  }
149 
150  range<Dimensions> get_local_range() const { return localRange; }
151 
152  size_t get_local_range(int dimension) const { return localRange[dimension]; }
153 
154  size_t get_local_linear_range() const {
155  return get_local_linear_range_impl();
156  }
157 
158  range<Dimensions> get_group_range() const { return groupRange; }
159 
160  size_t get_group_range(int dimension) const {
161  return get_group_range()[dimension];
162  }
163 
164  size_t get_group_linear_range() const {
165  return get_group_linear_range_impl();
166  }
167 
168  range<Dimensions> get_max_local_range() const { return get_local_range(); }
169 
170  size_t operator[](int dimension) const { return index[dimension]; }
171 
172  __SYCL2020_DEPRECATED("use sycl::group::get_group_linear_id() instead")
173  size_t get_linear_id() const { return get_group_linear_id(); }
174 
175  size_t get_group_linear_id() const { return get_group_linear_id_impl(); }
176 
177  bool leader() const { return (get_local_linear_id() == 0); }
178 
179  template <typename WorkItemFunctionT>
180  void parallel_for_work_item(WorkItemFunctionT Func) const {
181  // need barriers to enforce SYCL semantics for the work item loop -
182  // compilers are expected to optimize when possible
184 #ifdef __SYCL_DEVICE_ONLY__
185  range<Dimensions> GlobalSize{
186  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
187  range<Dimensions> LocalSize{
188  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
189  id<Dimensions> GlobalId{
190  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
191  id<Dimensions> LocalId{
192  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
193 
194  // no 'iterate' in the device code variant, because
195  // (1) this code is already invoked by each work item as a part of the
196  // enclosing parallel_for_work_group kernel
197  // (2) the range this pfwi iterates over matches work group size exactly
198  item<Dimensions, false> GlobalItem =
199  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
200  item<Dimensions, false> LocalItem =
201  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
202  h_item<Dimensions> HItem =
203  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
204 
205  Func(HItem);
206 #else
207  id<Dimensions> GroupStartID = index * id<Dimensions>{localRange};
208 
209  // ... host variant needs explicit 'iterate' because it is serial
211  localRange, [&](const id<Dimensions> &LocalID) {
212  item<Dimensions, false> GlobalItem =
213  detail::Builder::createItem<Dimensions, false>(
214  globalRange, GroupStartID + LocalID);
215  item<Dimensions, false> LocalItem =
216  detail::Builder::createItem<Dimensions, false>(localRange,
217  LocalID);
218  h_item<Dimensions> HItem =
219  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
220  Func(HItem);
221  });
222 #endif // __SYCL_DEVICE_ONLY__
223  // Need both barriers here - before and after the parallel_for_work_item
224  // (PFWI). There can be work group scope code after the PFWI which reads
225  // work group local data written within this PFWI. Back Ends are expected to
226  // optimize away unneeded barriers (e.g. two barriers in a row).
228  }
229 
230  template <typename WorkItemFunctionT>
231  void parallel_for_work_item(range<Dimensions> flexibleRange,
232  WorkItemFunctionT Func) const {
234 #ifdef __SYCL_DEVICE_ONLY__
235  range<Dimensions> GlobalSize{
236  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
237  range<Dimensions> LocalSize{
238  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
239  id<Dimensions> GlobalId{
240  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
241  id<Dimensions> LocalId{
242  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
243 
244  item<Dimensions, false> GlobalItem =
245  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
246  item<Dimensions, false> LocalItem =
247  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
248  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
249  GlobalItem, LocalItem, flexibleRange);
250 
251  // iterate over flexible range with work group size stride; each item
252  // performs flexibleRange/LocalSize iterations (if the former is divisible
253  // by the latter)
255  LocalId, LocalSize, flexibleRange,
256  [&](const id<Dimensions> &LogicalLocalID) {
257  HItem.setLogicalLocalID(LogicalLocalID);
258  Func(HItem);
259  });
260 #else
261  id<Dimensions> GroupStartID = index * localRange;
262 
264  localRange, [&](const id<Dimensions> &LocalID) {
265  item<Dimensions, false> GlobalItem =
266  detail::Builder::createItem<Dimensions, false>(
267  globalRange, GroupStartID + LocalID);
268  item<Dimensions, false> LocalItem =
269  detail::Builder::createItem<Dimensions, false>(localRange,
270  LocalID);
271  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
272  GlobalItem, LocalItem, flexibleRange);
273 
275  LocalID, localRange, flexibleRange,
276  [&](const id<Dimensions> &LogicalLocalID) {
277  HItem.setLogicalLocalID(LogicalLocalID);
278  Func(HItem);
279  });
280  });
281 #endif // __SYCL_DEVICE_ONLY__
283  }
284 
287  template <access::mode accessMode = access::mode::read_write>
288  void mem_fence(
289  typename std::enable_if_t<accessMode == access::mode::read ||
290  accessMode == access::mode::write ||
291  accessMode == access::mode::read_write,
293  accessSpace = access::fence_space::global_and_local) const {
294  uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace);
295  // TODO: currently, there is no good way in SPIR-V to set the memory
296  // barrier only for load operations or only for store operations.
297  // The full read-and-write barrier is used and the template parameter
298  // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be
299  // changed to address this discrepancy between SPIR-V and SYCL,
300  // or if we decide that 'accessMode' is the important feature then
301  // we can fix this later, for example, by using OpenCL 1.2 functions
302  // read_mem_fence() and write_mem_fence().
304  }
305 
311  template <typename dataT>
312  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
313  std::enable_if_t<!detail::is_bool<dataT>::value,
314  device_event> async_work_group_copy(local_ptr<dataT> dest,
315  global_ptr<dataT> src,
316  size_t numElements,
317  size_t srcStride) const {
320  detail::convertToOpenCLType(src), numElements, srcStride, 0);
321  return device_event(E);
322  }
323 
329  template <typename dataT>
330  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
331  std::enable_if_t<!detail::is_bool<dataT>::value,
332  device_event> async_work_group_copy(global_ptr<dataT> dest,
333  local_ptr<dataT> src,
334  size_t numElements,
335  size_t destStride)
336  const {
339  detail::convertToOpenCLType(src), numElements, destStride, 0);
340  return device_event(E);
341  }
342 
349  template <typename DestDataT, typename SrcDataT>
350  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
351  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
352  device_event>
353  async_work_group_copy(decorated_local_ptr<DestDataT> dest,
354  decorated_global_ptr<SrcDataT> src, size_t numElements,
355  size_t srcStride) const {
358  detail::convertToOpenCLType(src), numElements, srcStride, 0);
359  return device_event(E);
360  }
361 
368  template <typename DestDataT, typename SrcDataT>
369  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
370  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
371  device_event>
372  async_work_group_copy(decorated_global_ptr<DestDataT> dest,
373  decorated_local_ptr<SrcDataT> src, size_t numElements,
374  size_t destStride) const {
377  detail::convertToOpenCLType(src), numElements, destStride, 0);
378  return device_event(E);
379  }
380 
386  template <typename T, access::address_space DestS, access::address_space SrcS>
387  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
388  std::enable_if_t<
389  detail::is_scalar_bool<T>::value,
390  device_event> async_work_group_copy(multi_ptr<T, DestS,
391  access::decorated::legacy>
392  Dest,
393  multi_ptr<T, SrcS,
394  access::decorated::legacy>
395  Src,
396  size_t NumElements,
397  size_t Stride) const {
398  static_assert(sizeof(bool) == sizeof(uint8_t),
399  "Async copy to/from bool memory is not supported.");
400  auto DestP = multi_ptr<uint8_t, DestS, access::decorated::legacy>(
401  reinterpret_cast<uint8_t *>(Dest.get()));
402  auto SrcP = multi_ptr<uint8_t, SrcS, access::decorated::legacy>(
403  reinterpret_cast<uint8_t *>(Src.get()));
404  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
405  }
406 
412  template <typename T, access::address_space DestS, access::address_space SrcS>
413  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
414  std::enable_if_t<
415  detail::is_vector_bool<T>::value,
416  device_event> async_work_group_copy(multi_ptr<T, DestS,
417  access::decorated::legacy>
418  Dest,
419  multi_ptr<T, SrcS,
420  access::decorated::legacy>
421  Src,
422  size_t NumElements,
423  size_t Stride) const {
424  static_assert(sizeof(bool) == sizeof(uint8_t),
425  "Async copy to/from bool memory is not supported.");
426  using VecT = detail::change_base_type_t<T, uint8_t>;
427  auto DestP = address_space_cast<DestS, access::decorated::legacy>(
428  reinterpret_cast<VecT *>(Dest.get()));
429  auto SrcP = address_space_cast<SrcS, access::decorated::legacy>(
430  reinterpret_cast<VecT *>(Src.get()));
431  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
432  }
433 
439  template <typename DestT, access::address_space DestS, typename SrcT,
441  std::enable_if_t<detail::is_scalar_bool<DestT>::value &&
442  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
443  device_event>
444  async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
445  multi_ptr<SrcT, SrcS, access::decorated::yes> Src,
446  size_t NumElements, size_t Stride) const {
447  static_assert(sizeof(bool) == sizeof(uint8_t),
448  "Async copy to/from bool memory is not supported.");
449  using QualSrcT =
450  std::conditional_t<std::is_const_v<SrcT>, const uint8_t, uint8_t>;
451  auto DestP = multi_ptr<uint8_t, DestS, access::decorated::yes>(
452  detail::cast_AS<typename multi_ptr<uint8_t, DestS,
454  Dest.get_decorated()));
455  auto SrcP = multi_ptr<QualSrcT, SrcS, access::decorated::yes>(
456  detail::cast_AS<typename multi_ptr<QualSrcT, SrcS,
458  Src.get_decorated()));
459  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
460  }
461 
467  template <typename DestT, access::address_space DestS, typename SrcT,
469  std::enable_if_t<detail::is_vector_bool<DestT>::value &&
470  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
471  device_event>
472  async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
473  multi_ptr<SrcT, SrcS, access::decorated::yes> Src,
474  size_t NumElements, size_t Stride) const {
475  static_assert(sizeof(bool) == sizeof(uint8_t),
476  "Async copy to/from bool memory is not supported.");
477  using VecT = detail::change_base_type_t<DestT, uint8_t>;
478  using QualSrcVecT =
479  std::conditional_t<std::is_const_v<SrcT>, std::add_const_t<VecT>, VecT>;
480  auto DestP = multi_ptr<VecT, DestS, access::decorated::yes>(
483  Dest.get_decorated()));
484  auto SrcP = multi_ptr<QualSrcVecT, SrcS, access::decorated::yes>(
485  detail::cast_AS<typename multi_ptr<QualSrcVecT, SrcS,
487  Src.get_decorated()));
488  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
489  }
490 
496  template <typename dataT>
497  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
498  device_event
499  async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
500  size_t numElements) const {
501  return async_work_group_copy(dest, src, numElements, 1);
502  }
503 
509  template <typename dataT>
510  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
511  device_event
512  async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
513  size_t numElements) const {
514  return async_work_group_copy(dest, src, numElements, 1);
515  }
516 
523  template <typename DestDataT, typename SrcDataT>
524  typename std::enable_if_t<
525  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
526  async_work_group_copy(decorated_local_ptr<DestDataT> dest,
527  decorated_global_ptr<SrcDataT> src,
528  size_t numElements) const {
529  return async_work_group_copy(dest, src, numElements, 1);
530  }
531 
538  template <typename DestDataT, typename SrcDataT>
539  typename std::enable_if_t<
540  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
541  async_work_group_copy(decorated_global_ptr<DestDataT> dest,
542  decorated_local_ptr<SrcDataT> src,
543  size_t numElements) const {
544  return async_work_group_copy(dest, src, numElements, 1);
545  }
546 
547  template <typename... eventTN> void wait_for(eventTN... Events) const {
548  waitForHelper(Events...);
549  }
550 
551  bool operator==(const group<Dimensions> &rhs) const {
552  bool Result = (rhs.globalRange == globalRange) &&
553  (rhs.localRange == localRange) && (rhs.index == index);
554  __SYCL_ASSERT(rhs.groupRange == groupRange &&
555  "inconsistent group class fields");
556  return Result;
557  }
558 
559  bool operator!=(const group<Dimensions> &rhs) const {
560  return !((*this) == rhs);
561  }
562 
563 private:
564  range<Dimensions> globalRange;
565  range<Dimensions> localRange;
566  range<Dimensions> groupRange;
567  id<Dimensions> index;
568 
569  template <int dims = Dimensions>
570  typename std::enable_if_t<(dims == 1), size_t>
571  get_local_linear_id_impl() const {
572  id<Dimensions> localId = get_local_id();
573  return localId[0];
574  }
575 
576  template <int dims = Dimensions>
577  typename std::enable_if_t<(dims == 2), size_t>
578  get_local_linear_id_impl() const {
579  id<Dimensions> localId = get_local_id();
580  return localId[0] * localRange[1] + localId[1];
581  }
582 
583  template <int dims = Dimensions>
584  typename std::enable_if_t<(dims == 3), size_t>
585  get_local_linear_id_impl() const {
586  id<Dimensions> localId = get_local_id();
587  return (localId[0] * localRange[1] * localRange[2]) +
588  (localId[1] * localRange[2]) + localId[2];
589  }
590 
591  template <int dims = Dimensions>
592  typename std::enable_if_t<(dims == 1), size_t>
593  get_local_linear_range_impl() const {
594  auto localRange = get_local_range();
595  return localRange[0];
596  }
597 
598  template <int dims = Dimensions>
599  typename std::enable_if_t<(dims == 2), size_t>
600  get_local_linear_range_impl() const {
601  auto localRange = get_local_range();
602  return localRange[0] * localRange[1];
603  }
604 
605  template <int dims = Dimensions>
606  typename std::enable_if_t<(dims == 3), size_t>
607  get_local_linear_range_impl() const {
608  auto localRange = get_local_range();
609  return localRange[0] * localRange[1] * localRange[2];
610  }
611 
612  template <int dims = Dimensions>
613  typename std::enable_if_t<(dims == 1), size_t>
614  get_group_linear_range_impl() const {
615  auto groupRange = get_group_range();
616  return groupRange[0];
617  }
618 
619  template <int dims = Dimensions>
620  typename std::enable_if_t<(dims == 2), size_t>
621  get_group_linear_range_impl() const {
622  auto groupRange = get_group_range();
623  return groupRange[0] * groupRange[1];
624  }
625 
626  template <int dims = Dimensions>
627  typename std::enable_if_t<(dims == 3), size_t>
628  get_group_linear_range_impl() const {
629  auto groupRange = get_group_range();
630  return groupRange[0] * groupRange[1] * groupRange[2];
631  }
632 
633  template <int dims = Dimensions>
634  typename std::enable_if_t<(dims == 1), size_t>
635  get_group_linear_id_impl() const {
636  return index[0];
637  }
638 
639  template <int dims = Dimensions>
640  typename std::enable_if_t<(dims == 2), size_t>
641  get_group_linear_id_impl() const {
642  return index[0] * groupRange[1] + index[1];
643  }
644 
645  // SYCL specification 1.2.1rev5, section 4.7.6.5 "Buffer accessor":
646  // Whenever a multi-dimensional index is passed to a SYCL accessor the
647  // linear index is calculated based on the index {id1, id2, id3} provided
648  // and the range of the SYCL accessor {r1, r2, r3} according to row-major
649  // ordering as follows:
650  // id3 + (id2 · r3) + (id1 · r3 · r2) (4.3)
651  // section 4.8.1.8 "group class":
652  // size_t get_linear_id()const
653  // Get a linearized version of the work-group id. Calculating a linear
654  // work-group id from a multi-dimensional index follows the equation 4.3.
655  template <int dims = Dimensions>
656  typename std::enable_if_t<(dims == 3), size_t>
657  get_group_linear_id_impl() const {
658  return (index[0] * groupRange[1] * groupRange[2]) +
659  (index[1] * groupRange[2]) + index[2];
660  }
661 
662  void waitForHelper() const {}
663 
664  void waitForHelper(device_event Event) const { Event.wait(); }
665 
666  template <typename T, typename... Ts>
667  void waitForHelper(T E, Ts... Es) const {
668  waitForHelper(E);
669  waitForHelper(Es...);
670  }
671 
672 protected:
673  friend class detail::Builder;
674  group(const range<Dimensions> &G, const range<Dimensions> &L,
675  const range<Dimensions> GroupRange, const id<Dimensions> &I)
676  : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
677  // Make sure local range divides global without remainder:
678  __SYCL_ASSERT(((G % L).size() == 0) &&
679  "global range is not multiple of local");
680  __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) &&
681  "inconsistent group constructor arguments");
682  }
683 };
684 } // namespace _V1
685 } // namespace sycl
#define __SYCL_ASSERT(x)
Definition: common.hpp:166
ToT cast_AS(FromT from)
Definition: access.hpp:330
auto get_local_linear_id(Group g)
auto get_local_linear_range(Group g)
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:198
void workGroupBarrier()
Definition: group.hpp:42
fence_scope
The scope that fence() operation should apply to.
Definition: common.hpp:350
@ group
Wait until all previous memory transactions from this thread are observed within the local thread-gro...
bool operator==(const cache_config &lhs, const cache_config &rhs)
bool operator!=(const cache_config &lhs, const cache_config &rhs)
T & operator[](std::ptrdiff_t idx) const noexcept
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:3233
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
multi_ptr< ElementType, access::address_space::global_space, IsDecorated > global_ptr
Definition: pointers.hpp:29
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:459
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
multi_ptr< ElementType, access::address_space::local_space, IsDecorated > local_ptr
Definition: pointers.hpp:34
Definition: access.hpp:18
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1295
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1307
void * __ocl_event_t
static __SYCL_ALWAYS_INLINE void iterate(const LoopBoundTy< NDims > &UpperBound, FuncTy f)
Generates ND loop nest with {0,..0} .
Definition: common.hpp:325