DPC++ Runtime
Runtime libraries for oneAPI DPC++
group.hpp
Go to the documentation of this file.
1 //==-------------- group.hpp --- SYCL work group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp> // for __spirv_MemoryBarrier
12 #include <CL/__spirv/spirv_types.hpp> // for Scope, __ocl_event_t
13 #include <sycl/access/access.hpp> // for decorated, mode, addr...
14 #include <sycl/detail/common.hpp> // for NDLoop, __SYCL_ASSERT
15 #include <sycl/detail/defines.hpp> // for __SYCL_TYPE
16 #include <sycl/detail/defines_elementary.hpp> // for __SYCL2020_DEPRECATED
17 #include <sycl/detail/generic_type_traits.hpp> // for ConvertToOpenCLType_t
18 #include <sycl/detail/helpers.hpp> // for Builder, getSPIRVMemo...
19 #include <sycl/detail/item_base.hpp> // for id, range
20 #include <sycl/detail/type_traits.hpp> // for is_bool, change_base_...
21 #include <sycl/device_event.hpp> // for device_event
22 #include <sycl/exception.hpp> // for make_error_code, errc
23 #include <sycl/h_item.hpp> // for h_item
24 #include <sycl/id.hpp> // for id
25 #include <sycl/item.hpp> // for item
26 #include <sycl/memory_enums.hpp> // for memory_scope
27 #include <sycl/multi_ptr.hpp> // for multi_ptr, address_sp...
28 #include <sycl/pointers.hpp> // for decorated_global_ptr
29 #include <sycl/range.hpp> // for range
30 
31 #include <memory> // for unique_ptr
32 #include <stddef.h> // for size_t
33 #include <stdint.h> // for uint8_t, uint32_t
34 #include <type_traits> // for enable_if_t, remove_c...
35 
36 namespace sycl {
37 inline namespace _V1 {
38 namespace detail {
39 class Builder;
40 
41 // Implements a barrier accross work items within a work group.
42 inline void workGroupBarrier() {
43 #ifdef __SYCL_DEVICE_ONLY__
44  constexpr uint32_t flags =
45  static_cast<uint32_t>(
47  static_cast<uint32_t>(__spv::MemorySemanticsMask::WorkgroupMemory);
49  flags);
50 #endif // __SYCL_DEVICE_ONLY__
51 }
52 
53 } // namespace detail
54 
55 // SYCL 1.2.1rev5, section "4.8.5.3 Parallel For hierarchical invoke":
56 // Quote:
57 // ... To guarantee use of private per-work-item memory, the private_memory
58 // class can be used to wrap the data. This class very simply constructs
59 // private data for a given group across the entire group.The id of the
60 // current work-item is passed to any access to grab the correct data.
61 template <typename T, int Dimensions = 1>
62 class __SYCL_TYPE(private_memory) private_memory {
63 public:
64  // Construct based directly off the number of work-items
65  private_memory(const group<Dimensions> &G) {
66 #ifndef __SYCL_DEVICE_ONLY__
67  // serial host => one instance per work-group - allocate space for each WI
68  // in the group:
69  Val.reset(new T[G.get_local_range().size()]);
70 #endif // __SYCL_DEVICE_ONLY__
71  (void)G;
72  }
73 
74  // Access the instance for the current work-item
75  T &operator()(const h_item<Dimensions> &Id) {
76 #ifndef __SYCL_DEVICE_ONLY__
77  // Calculate the linear index of current WI and return reference to the
78  // corresponding spot in the value array:
79  size_t Ind = Id.get_physical_local().get_linear_id();
80  return Val.get()[Ind];
81 #else
82  (void)Id;
83  return Val;
84 #endif // __SYCL_DEVICE_ONLY__
85  }
86 
87 private:
88 #ifdef __SYCL_DEVICE_ONLY__
89  // On SYCL device private_memory<T> instance is created per physical WI, so
90  // there is 1:1 correspondence betwen this class instances and per-WI memory.
91  T Val;
92 #else
93  // On serial host there is one private_memory<T> instance per work group, so
94  // it must have space to hold separate value per WI in the group.
95  std::unique_ptr<T[]> Val;
96 #endif // #ifdef __SYCL_DEVICE_ONLY__
97 };
98 
103 template <int Dimensions = 1> class __SYCL_TYPE(group) group {
104 public:
105 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
106  using id_type = id<Dimensions>;
107  using range_type = range<Dimensions>;
108  using linear_id_type = size_t;
109  static constexpr int dimensions = Dimensions;
110 #endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
111 
112  static constexpr sycl::memory_scope fence_scope =
113  sycl::memory_scope::work_group;
114 
115  group() = delete;
116 
117  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
118  id<Dimensions> get_id() const { return index; }
119 
120  __SYCL2020_DEPRECATED("use sycl::group::get_group_id() instead")
121  size_t get_id(int dimension) const { return index[dimension]; }
122 
123  id<Dimensions> get_group_id() const { return index; }
124 
125  size_t get_group_id(int dimension) const { return index[dimension]; }
126 
127  __SYCL2020_DEPRECATED("calculate sycl::group::get_group_range() * "
128  "sycl::group::get_max_local_range() instead")
129  range<Dimensions> get_global_range() const { return globalRange; }
130 
131  size_t get_global_range(int dimension) const {
132  return globalRange[dimension];
133  }
134 
135  id<Dimensions> get_local_id() const {
136 #ifdef __SYCL_DEVICE_ONLY__
137  return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
138 #else
140  "get_local_id() is not implemented on host");
141 #endif
142  }
143 
144  size_t get_local_id(int dimention) const { return get_local_id()[dimention]; }
145 
146  size_t get_local_linear_id() const {
147  return get_local_linear_id_impl<Dimensions>();
148  }
149 
150  range<Dimensions> get_local_range() const { return localRange; }
151 
152  size_t get_local_range(int dimension) const { return localRange[dimension]; }
153 
154  size_t get_local_linear_range() const {
155  return get_local_linear_range_impl();
156  }
157 
158  range<Dimensions> get_group_range() const { return groupRange; }
159 
160  size_t get_group_range(int dimension) const {
161  return get_group_range()[dimension];
162  }
163 
164  size_t get_group_linear_range() const {
165  return get_group_linear_range_impl();
166  }
167 
168  range<Dimensions> get_max_local_range() const { return get_local_range(); }
169 
170  size_t operator[](int dimension) const { return index[dimension]; }
171 
172  __SYCL2020_DEPRECATED("use sycl::group::get_group_linear_id() instead")
173  size_t get_linear_id() const { return get_group_linear_id(); }
174 
175  size_t get_group_linear_id() const { return get_group_linear_id_impl(); }
176 
177  bool leader() const { return (get_local_linear_id() == 0); }
178 
179  template <typename WorkItemFunctionT>
180  void parallel_for_work_item(WorkItemFunctionT Func) const {
181  // need barriers to enforce SYCL semantics for the work item loop -
182  // compilers are expected to optimize when possible
184 #ifdef __SYCL_DEVICE_ONLY__
185  range<Dimensions> GlobalSize{
186  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
187  range<Dimensions> LocalSize{
188  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
189  id<Dimensions> GlobalId{
190  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
191  id<Dimensions> LocalId{
192  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
193 
194  // no 'iterate' in the device code variant, because
195  // (1) this code is already invoked by each work item as a part of the
196  // enclosing parallel_for_work_group kernel
197  // (2) the range this pfwi iterates over matches work group size exactly
198  item<Dimensions, false> GlobalItem =
199  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
200  item<Dimensions, false> LocalItem =
201  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
202  h_item<Dimensions> HItem =
203  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
204 
205  Func(HItem);
206 #else
207  id<Dimensions> GroupStartID = index * localRange;
208 
209  // ... host variant needs explicit 'iterate' because it is serial
211  localRange, [&](const id<Dimensions> &LocalID) {
212  item<Dimensions, false> GlobalItem =
213  detail::Builder::createItem<Dimensions, false>(
214  globalRange, GroupStartID + LocalID);
215  item<Dimensions, false> LocalItem =
216  detail::Builder::createItem<Dimensions, false>(localRange,
217  LocalID);
218  h_item<Dimensions> HItem =
219  detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
220  Func(HItem);
221  });
222 #endif // __SYCL_DEVICE_ONLY__
223  // Need both barriers here - before and after the parallel_for_work_item
224  // (PFWI). There can be work group scope code after the PFWI which reads
225  // work group local data written within this PFWI. Back Ends are expected to
226  // optimize away unneeded barriers (e.g. two barriers in a row).
228  }
229 
230  template <typename WorkItemFunctionT>
231  void parallel_for_work_item(range<Dimensions> flexibleRange,
232  WorkItemFunctionT Func) const {
234 #ifdef __SYCL_DEVICE_ONLY__
235  range<Dimensions> GlobalSize{
236  __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
237  range<Dimensions> LocalSize{
238  __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
239  id<Dimensions> GlobalId{
240  __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
241  id<Dimensions> LocalId{
242  __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
243 
244  item<Dimensions, false> GlobalItem =
245  detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
246  item<Dimensions, false> LocalItem =
247  detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
248  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
249  GlobalItem, LocalItem, flexibleRange);
250 
251  // iterate over flexible range with work group size stride; each item
252  // performs flexibleRange/LocalSize iterations (if the former is divisible
253  // by the latter)
255  LocalId, LocalSize, flexibleRange,
256  [&](const id<Dimensions> &LogicalLocalID) {
257  HItem.setLogicalLocalID(LogicalLocalID);
258  Func(HItem);
259  });
260 #else
261  id<Dimensions> GroupStartID = index * localRange;
262 
264  localRange, [&](const id<Dimensions> &LocalID) {
265  item<Dimensions, false> GlobalItem =
266  detail::Builder::createItem<Dimensions, false>(
267  globalRange, GroupStartID + LocalID);
268  item<Dimensions, false> LocalItem =
269  detail::Builder::createItem<Dimensions, false>(localRange,
270  LocalID);
271  h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
272  GlobalItem, LocalItem, flexibleRange);
273 
275  LocalID, localRange, flexibleRange,
276  [&](const id<Dimensions> &LogicalLocalID) {
277  HItem.setLogicalLocalID(LogicalLocalID);
278  Func(HItem);
279  });
280  });
281 #endif // __SYCL_DEVICE_ONLY__
283  }
284 
287  template <access::mode accessMode = access::mode::read_write>
288  void mem_fence(
289  typename std::enable_if_t<accessMode == access::mode::read ||
290  accessMode == access::mode::write ||
291  accessMode == access::mode::read_write,
293  accessSpace = access::fence_space::global_and_local) const {
294  uint32_t flags = detail::getSPIRVMemorySemanticsMask(accessSpace);
295  // TODO: currently, there is no good way in SPIR-V to set the memory
296  // barrier only for load operations or only for store operations.
297  // The full read-and-write barrier is used and the template parameter
298  // 'accessMode' is ignored for now. Either SPIR-V or SYCL spec may be
299  // changed to address this discrepancy between SPIR-V and SYCL,
300  // or if we decide that 'accessMode' is the important feature then
301  // we can fix this later, for example, by using OpenCL 1.2 functions
302  // read_mem_fence() and write_mem_fence().
304  }
305 
311  template <typename dataT>
312  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
313  std::enable_if_t<!detail::is_bool<dataT>::value,
314  device_event> async_work_group_copy(local_ptr<dataT> dest,
315  global_ptr<dataT> src,
316  size_t numElements,
317  size_t srcStride) const {
318  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
319  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
320 
322  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
323  numElements, srcStride, 0);
324  return device_event(E);
325  }
326 
332  template <typename dataT>
333  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
334  std::enable_if_t<!detail::is_bool<dataT>::value,
335  device_event> async_work_group_copy(global_ptr<dataT> dest,
336  local_ptr<dataT> src,
337  size_t numElements,
338  size_t destStride)
339  const {
340  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
341  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
342 
344  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
345  numElements, destStride, 0);
346  return device_event(E);
347  }
348 
355  template <typename DestDataT, typename SrcDataT>
356  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
357  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
358  device_event>
359  async_work_group_copy(decorated_local_ptr<DestDataT> dest,
360  decorated_global_ptr<SrcDataT> src, size_t numElements,
361  size_t srcStride) const {
362  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
363  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
364 
366  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
367  numElements, srcStride, 0);
368  return device_event(E);
369  }
370 
377  template <typename DestDataT, typename SrcDataT>
378  std::enable_if_t<!detail::is_bool<DestDataT>::value &&
379  std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
380  device_event>
381  async_work_group_copy(decorated_global_ptr<DestDataT> dest,
382  decorated_local_ptr<SrcDataT> src, size_t numElements,
383  size_t destStride) const {
384  using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
385  using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
386 
388  __spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
389  numElements, destStride, 0);
390  return device_event(E);
391  }
392 
398  template <typename T, access::address_space DestS, access::address_space SrcS>
399  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
400  std::enable_if_t<
402  device_event> async_work_group_copy(multi_ptr<T, DestS,
403  access::decorated::legacy>
404  Dest,
405  multi_ptr<T, SrcS,
406  access::decorated::legacy>
407  Src,
408  size_t NumElements,
409  size_t Stride) const {
410  static_assert(sizeof(bool) == sizeof(uint8_t),
411  "Async copy to/from bool memory is not supported.");
413  reinterpret_cast<uint8_t *>(Dest.get()));
415  reinterpret_cast<uint8_t *>(Src.get()));
416  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
417  }
418 
424  template <typename T, access::address_space DestS, access::address_space SrcS>
425  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
426  std::enable_if_t<
428  device_event> async_work_group_copy(multi_ptr<T, DestS,
429  access::decorated::legacy>
430  Dest,
431  multi_ptr<T, SrcS,
432  access::decorated::legacy>
433  Src,
434  size_t NumElements,
435  size_t Stride) const {
436  static_assert(sizeof(bool) == sizeof(uint8_t),
437  "Async copy to/from bool memory is not supported.");
439  auto DestP = address_space_cast<DestS, access::decorated::legacy>(
440  reinterpret_cast<VecT *>(Dest.get()));
441  auto SrcP = address_space_cast<SrcS, access::decorated::legacy>(
442  reinterpret_cast<VecT *>(Src.get()));
443  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
444  }
445 
451  template <typename DestT, access::address_space DestS, typename SrcT,
453  std::enable_if_t<detail::is_scalar_bool<DestT>::value &&
454  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
455  device_event>
456  async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
458  size_t NumElements, size_t Stride) const {
459  static_assert(sizeof(bool) == sizeof(uint8_t),
460  "Async copy to/from bool memory is not supported.");
461  using QualSrcT =
462  std::conditional_t<std::is_const_v<SrcT>, const uint8_t, uint8_t>;
464  detail::cast_AS<typename multi_ptr<uint8_t, DestS,
465  access::decorated::yes>::pointer>(
466  Dest.get_decorated()));
468  detail::cast_AS<typename multi_ptr<QualSrcT, SrcS,
469  access::decorated::yes>::pointer>(
470  Src.get_decorated()));
471  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
472  }
473 
479  template <typename DestT, access::address_space DestS, typename SrcT,
481  std::enable_if_t<detail::is_vector_bool<DestT>::value &&
482  std::is_same_v<std::remove_const_t<SrcT>, DestT>,
483  device_event>
484  async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
486  size_t NumElements, size_t Stride) const {
487  static_assert(sizeof(bool) == sizeof(uint8_t),
488  "Async copy to/from bool memory is not supported.");
490  using QualSrcVecT =
491  std::conditional_t<std::is_const_v<SrcT>, std::add_const_t<VecT>, VecT>;
495  Dest.get_decorated()));
497  detail::cast_AS<typename multi_ptr<QualSrcVecT, SrcS,
498  access::decorated::yes>::pointer>(
499  Src.get_decorated()));
500  return async_work_group_copy(DestP, SrcP, NumElements, Stride);
501  }
502 
508  template <typename dataT>
509  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
511  async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
512  size_t numElements) const {
513  return async_work_group_copy(dest, src, numElements, 1);
514  }
515 
521  template <typename dataT>
522  __SYCL2020_DEPRECATED("Use decorated multi_ptr arguments instead")
524  async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
525  size_t numElements) const {
526  return async_work_group_copy(dest, src, numElements, 1);
527  }
528 
535  template <typename DestDataT, typename SrcDataT>
536  typename std::enable_if_t<
537  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
538  async_work_group_copy(decorated_local_ptr<DestDataT> dest,
540  size_t numElements) const {
541  return async_work_group_copy(dest, src, numElements, 1);
542  }
543 
550  template <typename DestDataT, typename SrcDataT>
551  typename std::enable_if_t<
552  std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
553  async_work_group_copy(decorated_global_ptr<DestDataT> dest,
555  size_t numElements) const {
556  return async_work_group_copy(dest, src, numElements, 1);
557  }
558 
559  template <typename... eventTN> void wait_for(eventTN... Events) const {
560  waitForHelper(Events...);
561  }
562 
563  bool operator==(const group<Dimensions> &rhs) const {
564  bool Result = (rhs.globalRange == globalRange) &&
565  (rhs.localRange == localRange) && (rhs.index == index);
566  __SYCL_ASSERT(rhs.groupRange == groupRange &&
567  "inconsistent group class fields");
568  return Result;
569  }
570 
571  bool operator!=(const group<Dimensions> &rhs) const {
572  return !((*this) == rhs);
573  }
574 
575 private:
576  range<Dimensions> globalRange;
577  range<Dimensions> localRange;
578  range<Dimensions> groupRange;
579  id<Dimensions> index;
580 
581  template <int dims = Dimensions>
582  typename std::enable_if_t<(dims == 1), size_t>
583  get_local_linear_id_impl() const {
584  id<Dimensions> localId = get_local_id();
585  return localId[0];
586  }
587 
588  template <int dims = Dimensions>
589  typename std::enable_if_t<(dims == 2), size_t>
590  get_local_linear_id_impl() const {
591  id<Dimensions> localId = get_local_id();
592  return localId[0] * localRange[1] + localId[1];
593  }
594 
595  template <int dims = Dimensions>
596  typename std::enable_if_t<(dims == 3), size_t>
597  get_local_linear_id_impl() const {
598  id<Dimensions> localId = get_local_id();
599  return (localId[0] * localRange[1] * localRange[2]) +
600  (localId[1] * localRange[2]) + localId[2];
601  }
602 
603  template <int dims = Dimensions>
604  typename std::enable_if_t<(dims == 1), size_t>
605  get_local_linear_range_impl() const {
606  auto localRange = get_local_range();
607  return localRange[0];
608  }
609 
610  template <int dims = Dimensions>
611  typename std::enable_if_t<(dims == 2), size_t>
612  get_local_linear_range_impl() const {
613  auto localRange = get_local_range();
614  return localRange[0] * localRange[1];
615  }
616 
617  template <int dims = Dimensions>
618  typename std::enable_if_t<(dims == 3), size_t>
619  get_local_linear_range_impl() const {
620  auto localRange = get_local_range();
621  return localRange[0] * localRange[1] * localRange[2];
622  }
623 
624  template <int dims = Dimensions>
625  typename std::enable_if_t<(dims == 1), size_t>
626  get_group_linear_range_impl() const {
627  auto groupRange = get_group_range();
628  return groupRange[0];
629  }
630 
631  template <int dims = Dimensions>
632  typename std::enable_if_t<(dims == 2), size_t>
633  get_group_linear_range_impl() const {
634  auto groupRange = get_group_range();
635  return groupRange[0] * groupRange[1];
636  }
637 
638  template <int dims = Dimensions>
639  typename std::enable_if_t<(dims == 3), size_t>
640  get_group_linear_range_impl() const {
641  auto groupRange = get_group_range();
642  return groupRange[0] * groupRange[1] * groupRange[2];
643  }
644 
645  template <int dims = Dimensions>
646  typename std::enable_if_t<(dims == 1), size_t>
647  get_group_linear_id_impl() const {
648  return index[0];
649  }
650 
651  template <int dims = Dimensions>
652  typename std::enable_if_t<(dims == 2), size_t>
653  get_group_linear_id_impl() const {
654  return index[0] * groupRange[1] + index[1];
655  }
656 
657  // SYCL specification 1.2.1rev5, section 4.7.6.5 "Buffer accessor":
658  // Whenever a multi-dimensional index is passed to a SYCL accessor the
659  // linear index is calculated based on the index {id1, id2, id3} provided
660  // and the range of the SYCL accessor {r1, r2, r3} according to row-major
661  // ordering as follows:
662  // id3 + (id2 · r3) + (id1 · r3 · r2) (4.3)
663  // section 4.8.1.8 "group class":
664  // size_t get_linear_id()const
665  // Get a linearized version of the work-group id. Calculating a linear
666  // work-group id from a multi-dimensional index follows the equation 4.3.
667  template <int dims = Dimensions>
668  typename std::enable_if_t<(dims == 3), size_t>
669  get_group_linear_id_impl() const {
670  return (index[0] * groupRange[1] * groupRange[2]) +
671  (index[1] * groupRange[2]) + index[2];
672  }
673 
674  void waitForHelper() const {}
675 
676  void waitForHelper(device_event Event) const { Event.wait(); }
677 
678  template <typename T, typename... Ts>
679  void waitForHelper(T E, Ts... Es) const {
680  waitForHelper(E);
681  waitForHelper(Es...);
682  }
683 
684 protected:
685  friend class detail::Builder;
686  group(const range<Dimensions> &G, const range<Dimensions> &L,
687  const range<Dimensions> GroupRange, const id<Dimensions> &I)
688  : globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
689  // Make sure local range divides global without remainder:
690  __SYCL_ASSERT(((G % L).size() == 0) &&
691  "global range is not multiple of local");
692  __SYCL_ASSERT((((G / L) - GroupRange).size() == 0) &&
693  "inconsistent group constructor arguments");
694  }
695 };
696 
697 template <int Dims>
698 __SYCL_DEPRECATED("use sycl::ext::oneapi::experimental::this_group() instead")
699 group<Dims> this_group() {
700 #ifdef __SYCL_DEVICE_ONLY__
701  return detail::Builder::getElement(detail::declptr<group<Dims>>());
702 #else
703  throw sycl::exception(
704  sycl::make_error_code(sycl::errc::feature_not_supported),
705  "Free function calls are not supported on host");
706 #endif
707 }
708 
709 namespace ext::oneapi::experimental {
710 template <int Dims> group<Dims> this_group() {
711 #ifdef __SYCL_DEVICE_ONLY__
712  return sycl::detail::Builder::getElement(
714 #else
715  throw sycl::exception(
716  sycl::make_error_code(sycl::errc::feature_not_supported),
717  "Free function calls are not supported on host");
718 #endif
719 }
720 } // namespace ext::oneapi::experimental
721 } // namespace _V1
722 } // namespace sycl
spirv_ops.hpp
sycl::_V1::h_item::get_physical_local
item< Dimensions, false > get_physical_local() const
Definition: h_item.hpp:48
sycl::_V1::__SYCL2020_DEPRECATED
signed char __SYCL2020_DEPRECATED
Definition: aliases.hpp:94
sycl::_V1::detail::Builder
Definition: helpers.hpp:68
sycl::_V1::__SYCL_DEPRECATED
__SYCL_DEPRECATED("abs for floating point types is non-standard and has been " "deprecated. Please use fabs instead.") std
Definition: builtins_legacy_scalar.hpp:470
sycl::_V1::detail::ConvertToOpenCLType_t
typename ConvertToOpenCLTypeImpl< SelectMatchingOpenCLType_t< T > >::type ConvertToOpenCLType_t
Definition: generic_type_traits.hpp:679
sycl::_V1::device_event
Encapsulates a single SYCL device event which is available only within SYCL kernel functions and can ...
Definition: device_event.hpp:22
sycl::_V1::detail::NDLoop::iterate
static __SYCL_ALWAYS_INLINE void iterate(const LoopBoundTy< NDims > &UpperBound, FuncTy f)
Generates ND loop nest with {0,..0} .
Definition: common.hpp:325
sycl::_V1::detail::is_vector_bool
Definition: type_traits.hpp:311
__spv::Scope::Workgroup
@ Workgroup
Definition: spirv_types.hpp:31
type_traits.hpp
sycl::_V1::make_error_code
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:94
item.hpp
__spirv_ControlBarrier
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
sycl::_V1::detail::declptr
T * declptr()
Definition: helpers.hpp:56
sycl::_V1::errc::feature_not_supported
@ feature_not_supported
sycl::_V1::h_item::setLogicalLocalID
void setLogicalLocalID(const id< Dimensions > &ID)
Definition: h_item.hpp:129
__SYCL_OpGroupAsyncCopyGlobalToLocal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1136
__SYCL_ASSERT
#define __SYCL_ASSERT(x)
Definition: common.hpp:166
sycl::_V1::Dimensions
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
Definition: accessor.hpp:3235
helpers.hpp
sycl
Definition: access.hpp:18
sycl::_V1::device_event::wait
void wait()
Definition: device_event.hpp:34
__spirv_MemoryBarrier
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:36
__ocl_event_t
void * __ocl_event_t
Definition: spirv_types.hpp:163
access.hpp
__SYCL_OpGroupAsyncCopyLocalToGlobal
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
Definition: spirv_ops.hpp:1148
sycl::_V1::id
A unique identifier of an item in an index space.
Definition: array.hpp:20
sycl::_V1::access::fence_space
fence_space
Definition: access.hpp:43
id.hpp
sycl::_V1::detail::is_scalar_bool
Definition: type_traits.hpp:307
sycl::_V1::multi_ptr::get
pointer get() const
Definition: multi_ptr.hpp:293
sycl::_V1::range
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: buffer.hpp:52
sycl::_V1::multi_ptr::get_decorated
decorated_type * get_decorated() const
Definition: multi_ptr.hpp:294
sycl::_V1::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:34
defines_elementary.hpp
sycl::_V1::detail::change_base_type_t
typename change_base_type< T, B >::type change_base_type_t
Definition: type_traits.hpp:185
sycl::_V1::detail::get_local_linear_range
size_t get_local_linear_range(Group g)
sycl::_V1::item< Dimensions, false >
generic_type_traits.hpp
range.hpp
multi_ptr.hpp
defines.hpp
sycl::_V1::exception
Definition: exception.hpp:68
common.hpp
sycl::_V1::h_item
Identifies an instance of a group::parallel_for_work_item function object executing at each point in ...
Definition: helpers.hpp:37
sycl::_V1::access::fence_space::global_and_local
@ global_and_local
sycl::_V1::operator[]
ReturnRef operator[](difference_type index)
Definition: multi_ptr.hpp:840
sycl::_V1::multi_ptr::pointer
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:95
__spv::MemorySemanticsMask::SequentiallyConsistent
@ SequentiallyConsistent
Definition: spirv_types.hpp:90
sycl::_V1::detail::workGroupBarrier
void workGroupBarrier()
Definition: group.hpp:42
sycl::_V1::detail::get_local_linear_id
Group::linear_id_type get_local_linear_id(Group g)
__spv::MemorySemanticsMask::WorkgroupMemory
@ WorkgroupMemory
Definition: spirv_types.hpp:93
sycl::_V1::memory_scope
memory_scope
Definition: memory_enums.hpp:29
sycl::_V1::this_group
group< Dims > this_group()
Definition: group.hpp:699
exception.hpp
device_event.hpp
item_base.hpp
sycl::_V1::access::decorated::yes
@ yes
sycl::_V1::detail::cast_AS
ToT cast_AS(FromT from)
Definition: access.hpp:325
memory_enums.hpp
sycl::_V1::ext::intel::esimd::fence_scope
fence_scope
The scope that fence() operation should apply to.
Definition: common.hpp:389
sycl::_V1::detail::getSPIRVMemorySemanticsMask
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:198
h_item.hpp
sycl::_V1::item::get_linear_id
size_t __SYCL_ALWAYS_INLINE get_linear_id() const
Definition: item.hpp:104
sycl::_V1::access::mode::read_write
@ read_write
pointers.hpp
sycl::_V1::access::mode::write
@ write
sycl::_V1::group
Definition: helpers.hpp:33
sycl::_V1::access::mode::read
@ read
spirv_types.hpp
sycl::_V1::access::address_space
address_space
Definition: access.hpp:51