DPC++ Runtime
Runtime libraries for oneAPI DPC++
sub_group.hpp
Go to the documentation of this file.
1 //==----------- sub_group.hpp --- SYCL sub-group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for address_space, decorated
12 #include <sycl/detail/defines_elementary.hpp> // for __SYCL_DEPRECATED
13 #include <sycl/detail/generic_type_traits.hpp> // for select_cl_scalar_inte...
14 #include <sycl/detail/pi.h> // for PI_ERROR_INVALID_DEVICE
15 #include <sycl/detail/type_traits.hpp> // for is_scalar_arithmetic
16 #include <sycl/exception.hpp> // for exception, make_error...
17 #include <sycl/id.hpp> // for id
18 #include <sycl/memory_enums.hpp> // for memory_scope
19 #include <sycl/multi_ptr.hpp> // for multi_ptr
20 #include <sycl/range.hpp> // for range
21 #include <sycl/types.hpp> // for vec
22 
23 #ifdef __SYCL_DEVICE_ONLY__
25 #endif
26 
27 #include <stdint.h> // for uint32_t
28 #include <tuple> // for _Swallow_assign, ignore
29 #include <type_traits> // for enable_if_t, remove_cv_t
30 
31 namespace sycl {
32 inline namespace _V1 {
33 template <typename T, access::address_space Space,
34  access::decorated DecorateAddress>
35 class multi_ptr;
36 
37 namespace detail {
38 
39 namespace sub_group {
40 
41 // Selects 8, 16, 32, or 64-bit type depending on size of scalar type T.
42 template <typename T>
44 
45 template <typename MultiPtrTy> auto convertToBlockPtr(MultiPtrTy MultiPtr) {
46  static_assert(is_multi_ptr_v<MultiPtrTy>);
47  auto DecoratedPtr = convertToOpenCLType(MultiPtr);
48  using DecoratedPtrTy = decltype(DecoratedPtr);
50 
51  using TargetElemTy = SelectBlockT<ElemTy>;
52  // TODO: Handle cv qualifiers.
53 #ifdef __SYCL_DEVICE_ONLY__
54  using ResultTy =
55  typename DecoratedType<TargetElemTy,
56  deduce_AS<DecoratedPtrTy>::value>::type *;
57 #else
58  using ResultTy = TargetElemTy *;
59 #endif
60  return reinterpret_cast<ResultTy>(DecoratedPtr);
61 }
62 
63 template <typename T, access::address_space Space>
65  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
67 
68 template <typename T, access::address_space Space>
70  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
72 
73 #ifdef __SYCL_DEVICE_ONLY__
74 template <typename T, access::address_space Space,
75  access::decorated DecorateAddress>
76 T load(const multi_ptr<T, Space, DecorateAddress> src) {
77  using BlockT = SelectBlockT<T>;
78  BlockT Ret = __spirv_SubgroupBlockReadINTEL<BlockT>(convertToBlockPtr(src));
79 
80  return sycl::bit_cast<T>(Ret);
81 }
82 
83 template <int N, typename T, access::address_space Space,
84  access::decorated DecorateAddress>
86  using BlockT = SelectBlockT<T>;
87  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
88  VecT Ret = __spirv_SubgroupBlockReadINTEL<VecT>(convertToBlockPtr(src));
89 
90  return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
91 }
92 
93 template <typename T, access::address_space Space,
94  access::decorated DecorateAddress>
95 void store(multi_ptr<T, Space, DecorateAddress> dst, const T &x) {
96  using BlockT = SelectBlockT<T>;
97 
98  __spirv_SubgroupBlockWriteINTEL(convertToBlockPtr(dst),
99  sycl::bit_cast<BlockT>(x));
100 }
101 
102 template <int N, typename T, access::address_space Space,
103  access::decorated DecorateAddress>
104 void store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) {
105  using BlockT = SelectBlockT<T>;
106  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
107 
108  __spirv_SubgroupBlockWriteINTEL(convertToBlockPtr(dst),
109  sycl::bit_cast<VecT>(x));
110 }
111 #endif // __SYCL_DEVICE_ONLY__
112 
113 } // namespace sub_group
114 
115 // Helper for removing const and volatile qualifiers from the element type of
116 // a multi_ptr.
117 template <typename CVT, access::address_space Space,
118  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
119 inline multi_ptr<T, Space, IsDecorated>
121  if constexpr (IsDecorated == access::decorated::legacy) {
123  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer_t>(
124  Mptr.get())};
125  } else {
127  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer>(
128  Mptr.get_decorated())};
129  }
130 }
131 
132 } // namespace detail
133 
134 struct sub_group;
135 namespace ext::oneapi {
137 namespace experimental {
139 } // namespace experimental
140 } // namespace ext::oneapi
141 
142 struct sub_group {
143 
144  using id_type = id<1>;
146  using linear_id_type = uint32_t;
147  static constexpr int dimensions = 1;
148  static constexpr sycl::memory_scope fence_scope =
150 
151  /* --- common interface members --- */
152 
154 #ifdef __SYCL_DEVICE_ONLY__
155  return __spirv_SubgroupLocalInvocationId();
156 #else
158  "Sub-groups are not supported on host.");
159 #endif
160  }
161 
163 #ifdef __SYCL_DEVICE_ONLY__
164  return static_cast<linear_id_type>(get_local_id()[0]);
165 #else
167  "Sub-groups are not supported on host.");
168 #endif
169  }
170 
172 #ifdef __SYCL_DEVICE_ONLY__
173  return __spirv_SubgroupSize();
174 #else
176  "Sub-groups are not supported on host.");
177 #endif
178  }
179 
181 #ifdef __SYCL_DEVICE_ONLY__
182  return __spirv_SubgroupMaxSize();
183 #else
185  "Sub-groups are not supported on host.");
186 #endif
187  }
188 
190 #ifdef __SYCL_DEVICE_ONLY__
191  return __spirv_SubgroupId();
192 #else
194  "Sub-groups are not supported on host.");
195 #endif
196  }
197 
199 #ifdef __SYCL_DEVICE_ONLY__
200  return static_cast<linear_id_type>(get_group_id()[0]);
201 #else
203  "Sub-groups are not supported on host.");
204 #endif
205  }
206 
208 #ifdef __SYCL_DEVICE_ONLY__
209  return __spirv_NumSubgroups();
210 #else
212  "Sub-groups are not supported on host.");
213 #endif
214  }
215 
216  template <typename T>
218  std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
219 
220  /* --- one-input shuffles --- */
221  /* indices in [0 , sub_group size) */
222 
223  template <typename T> T shuffle(T x, id_type local_id) const {
224 #ifdef __SYCL_DEVICE_ONLY__
225  return sycl::detail::spirv::SubgroupShuffle(x, local_id);
226 #else
227  (void)x;
228  (void)local_id;
230  "Sub-groups are not supported on host.");
231 #endif
232  }
233 
234  template <typename T> T shuffle_down(T x, uint32_t delta) const {
235 #ifdef __SYCL_DEVICE_ONLY__
236  return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
237 #else
238  (void)x;
239  (void)delta;
241  "Sub-groups are not supported on host.");
242 #endif
243  }
244 
245  template <typename T> T shuffle_up(T x, uint32_t delta) const {
246 #ifdef __SYCL_DEVICE_ONLY__
247  return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
248 #else
249  (void)x;
250  (void)delta;
252  "Sub-groups are not supported on host.");
253 #endif
254  }
255 
256  template <typename T> T shuffle_xor(T x, id_type value) const {
257 #ifdef __SYCL_DEVICE_ONLY__
258  return sycl::detail::spirv::SubgroupShuffleXor(x, value);
259 #else
260  (void)x;
261  (void)value;
263  "Sub-groups are not supported on host.");
264 #endif
265  }
266 
267  /* --- sub_group load/stores --- */
268  /* these can map to SIMD or block read/write hardware where available */
269 #ifdef __SYCL_DEVICE_ONLY__
270  // Method for decorated pointer
271  template <typename CVT, typename T = std::remove_cv_t<CVT>>
272  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
273  load(CVT *cv_src) const {
274  T *src = const_cast<T *>(cv_src);
276  sycl::detail::deduce_AS<T>::value,
277  sycl::access::decorated::yes>(src));
278  }
279 
280  // Method for raw pointer
281  template <typename CVT, typename T = std::remove_cv_t<CVT>>
282  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
283  load(CVT *cv_src) const {
284  T *src = const_cast<T *>(cv_src);
285 
286 #if defined(__NVPTX__) || defined(__AMDGCN__)
287  return src[get_local_id()[0]];
288 #else // __NVPTX__ || __AMDGCN__
289  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
290  if (l)
291  return load(l);
292 
293  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
294  if (g)
295  return load(g);
296 
297  assert(!"Sub-group load() is supported for local or global pointers only.");
298  return {};
299 #endif // __NVPTX__ || __AMDGCN__
300  }
301 #else //__SYCL_DEVICE_ONLY__
302  template <typename CVT, typename T = std::remove_cv_t<CVT>>
303  T load(CVT *src) const {
304  (void)src;
306  "Sub-groups are not supported on host.");
307  }
308 #endif //__SYCL_DEVICE_ONLY__
309 
310  template <typename CVT, access::address_space Space,
311  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
312  std::enable_if_t<
317 #ifdef __SYCL_DEVICE_ONLY__
318 #if defined(__NVPTX__) || defined(__AMDGCN__)
319  return src.get()[get_local_id()[0]];
320 #else
321  return sycl::detail::sub_group::load(src);
322 #endif // __NVPTX__ || __AMDGCN__
323 #else
324  (void)src;
326  "Sub-groups are not supported on host.");
327 #endif // __SYCL_DEVICE_ONLY__
328  }
329 
330  template <typename CVT, access::address_space Space,
331  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
332  std::enable_if_t<
337 #ifdef __SYCL_DEVICE_ONLY__
338  return src.get()[get_local_id()[0]];
339 #else
340  (void)src;
342  "Sub-groups are not supported on host.");
343 #endif
344  }
345 #ifdef __SYCL_DEVICE_ONLY__
346 #if defined(__NVPTX__) || defined(__AMDGCN__)
347  template <int N, typename CVT, access::address_space Space,
348  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
349  std::enable_if_t<
351  vec<T, N>>
352  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
355  vec<T, N> res;
356  for (int i = 0; i < N; ++i) {
357  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
358  }
359  return res;
360  }
361 #else // __NVPTX__ || __AMDGCN__
362  template <int N, typename CVT, access::address_space Space,
363  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
364  std::enable_if_t<
366  N != 1 && N != 3 && N != 16,
367  vec<T, N>>
368  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
369  multi_ptr<T, Space, IsDecorated> src =
371  return sycl::detail::sub_group::load<N, T>(src);
372  }
373 
374  template <int N, typename CVT, access::address_space Space,
375  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
376  std::enable_if_t<
378  N == 16,
379  vec<T, 16>>
380  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
381  multi_ptr<T, Space, IsDecorated> src =
383  return {sycl::detail::sub_group::load<8, T>(src),
384  sycl::detail::sub_group::load<8, T>(src +
385  8 * get_max_local_range()[0])};
386  }
387 
388  template <int N, typename CVT, access::address_space Space,
389  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
390  std::enable_if_t<
392  N == 3,
393  vec<T, 3>>
394  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
395  multi_ptr<T, Space, IsDecorated> src =
397  return {
398  sycl::detail::sub_group::load<1, T>(src),
399  sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
400  }
401 
402  template <int N, typename CVT, access::address_space Space,
403  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
404  std::enable_if_t<
406  N == 1,
407  vec<T, 1>>
408  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
409  multi_ptr<T, Space, IsDecorated> src =
411  return sycl::detail::sub_group::load(src);
412  }
413 #endif // ___NVPTX___
414 #else // __SYCL_DEVICE_ONLY__
415  template <int N, typename CVT, access::address_space Space,
416  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
417  std::enable_if_t<
419  vec<T, N>>
421  (void)src;
423  "Sub-groups are not supported on host.");
424  }
425 #endif // __SYCL_DEVICE_ONLY__
426 
427  template <int N, typename CVT, access::address_space Space,
428  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
429  std::enable_if_t<
431  vec<T, N>>
435 #ifdef __SYCL_DEVICE_ONLY__
436  vec<T, N> res;
437  for (int i = 0; i < N; ++i) {
438  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
439  }
440  return res;
441 #else
442  (void)src;
444  "Sub-groups are not supported on host.");
445 #endif
446  }
447 
448 #ifdef __SYCL_DEVICE_ONLY__
449  // Method for decorated pointer
450  template <typename T>
451  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
452  store(T *dst, const remove_decoration_t<T> &x) const {
454  sycl::detail::deduce_AS<T>::value,
455  sycl::access::decorated::yes>(dst),
456  x);
457  }
458 
459  // Method for raw pointer
460  template <typename T>
461  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
462  store(T *dst, const remove_decoration_t<T> &x) const {
463 
464 #if defined(__NVPTX__) || defined(__AMDGCN__)
465  dst[get_local_id()[0]] = x;
466 #else // __NVPTX__ || __AMDGCN__
467  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
468  if (l) {
469  store(l, x);
470  return;
471  }
472 
473  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
474  if (g) {
475  store(g, x);
476  return;
477  }
478 
479  assert(
480  !"Sub-group store() is supported for local or global pointers only.");
481  return;
482 #endif // __NVPTX__ || __AMDGCN__
483  }
484 #else //__SYCL_DEVICE_ONLY__
485  template <typename T> void store(T *dst, const T &x) const {
486  (void)dst;
487  (void)x;
489  "Sub-groups are not supported on host.");
490  }
491 #endif //__SYCL_DEVICE_ONLY__
492 
493  template <typename T, access::address_space Space,
494  access::decorated DecorateAddress>
495  std::enable_if_t<
498 #ifdef __SYCL_DEVICE_ONLY__
499 #if defined(__NVPTX__) || defined(__AMDGCN__)
500  dst.get()[get_local_id()[0]] = x;
501 #else
502  sycl::detail::sub_group::store(dst, x);
503 #endif // __NVPTX__ || __AMDGCN__
504 #else
505  (void)dst;
506  (void)x;
508  "Sub-groups are not supported on host.");
509 #endif
510  }
511 
512  template <typename T, access::address_space Space,
513  access::decorated DecorateAddress>
514  std::enable_if_t<
517 #ifdef __SYCL_DEVICE_ONLY__
518  dst.get()[get_local_id()[0]] = x;
519 #else
520  (void)dst;
521  (void)x;
523  "Sub-groups are not supported on host.");
524 #endif
525  }
526 
527 #ifdef __SYCL_DEVICE_ONLY__
528 #if defined(__NVPTX__) || defined(__AMDGCN__)
529  template <int N, typename T, access::address_space Space,
530  access::decorated DecorateAddress>
531  std::enable_if_t<
534  for (int i = 0; i < N; ++i) {
535  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
536  }
537  }
538 #else // __NVPTX__ || __AMDGCN__
539  template <int N, typename T, access::address_space Space,
540  access::decorated DecorateAddress>
541  std::enable_if_t<
543  N != 1 && N != 3 && N != 16>
544  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
545  sycl::detail::sub_group::store(dst, x);
546  }
547 
548  template <int N, typename T, access::address_space Space,
549  access::decorated DecorateAddress>
550  std::enable_if_t<
552  N == 1>
553  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 1> &x) const {
554  sycl::detail::sub_group::store(dst, x);
555  }
556 
557  template <int N, typename T, access::address_space Space,
558  access::decorated DecorateAddress>
559  std::enable_if_t<
561  N == 3>
562  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 3> &x) const {
563  store<1, T, Space, DecorateAddress>(dst, x.s0());
564  store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
565  {x.s1(), x.s2()});
566  }
567 
568  template <int N, typename T, access::address_space Space,
569  access::decorated DecorateAddress>
570  std::enable_if_t<
572  N == 16>
573  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 16> &x) const {
574  store<8, T, Space, DecorateAddress>(dst, x.lo());
575  store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
576  x.hi());
577  }
578 
579 #endif // __NVPTX__ || __AMDGCN__
580 #else // __SYCL_DEVICE_ONLY__
581  template <int N, typename T, access::address_space Space,
582  access::decorated DecorateAddress>
583  std::enable_if_t<
586  (void)dst;
587  (void)x;
589  "Sub-groups are not supported on host.");
590  }
591 #endif // __SYCL_DEVICE_ONLY__
592 
593  template <int N, typename T, access::address_space Space,
594  access::decorated DecorateAddress>
595  std::enable_if_t<
598 #ifdef __SYCL_DEVICE_ONLY__
599  for (int i = 0; i < N; ++i) {
600  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
601  }
602 #else
603  (void)dst;
604  (void)x;
606  "Sub-groups are not supported on host.");
607 #endif
608  }
609 
610  /* --- synchronization functions --- */
611  void barrier() const {
612 #ifdef __SYCL_DEVICE_ONLY__
619 #else
621  "Sub-groups are not supported on host.");
622 #endif
623  }
624 
625  __SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated."
626  "Use barrier() without a fence_space instead.")
627  void barrier(access::fence_space accessSpace) const {
628 #ifdef __SYCL_DEVICE_ONLY__
629  int32_t flags = sycl::detail::getSPIRVMemorySemanticsMask(accessSpace);
631  flags);
632 #else
633  (void)accessSpace;
635  "Sub-groups are not supported on host.");
636 #endif
637  }
638 
639  /* --- deprecated collective functions --- */
640  template <typename T>
641  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
642  "sycl::ext::oneapi::broadcast instead.")
643  EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
644 #ifdef __SYCL_DEVICE_ONLY__
645  return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
646 #else
647  (void)x;
648  (void)local_id;
650  "Sub-groups are not supported on host.");
651 #endif
652  }
653 
654  template <typename T, class BinaryOperation>
655  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
656  "sycl::ext::oneapi::reduce instead.")
657  EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
658 #ifdef __SYCL_DEVICE_ONLY__
659  return sycl::detail::calc<__spv::GroupOperation::Reduce>(
660  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
661 #else
662  (void)x;
663  (void)op;
665  "Sub-groups are not supported on host.");
666 #endif
667  }
668 
669  template <typename T, class BinaryOperation>
670  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
671  "sycl::ext::oneapi::reduce instead.")
672  EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
673 #ifdef __SYCL_DEVICE_ONLY__
674  return op(init, reduce(x, op));
675 #else
676  (void)x;
677  (void)init;
678  (void)op;
680  "Sub-groups are not supported on host.");
681 #endif
682  }
683 
684  template <typename T, class BinaryOperation>
685  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
686  "sycl::ext::oneapi::exclusive_scan instead.")
687  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
688 #ifdef __SYCL_DEVICE_ONLY__
689  return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
690  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
691 #else
692  (void)x;
693  (void)op;
695  "Sub-groups are not supported on host.");
696 #endif
697  }
698 
699  template <typename T, class BinaryOperation>
700  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
701  "sycl::ext::oneapi::exclusive_scan instead.")
702  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
703  BinaryOperation op) const {
704 #ifdef __SYCL_DEVICE_ONLY__
705  if (get_local_id().get(0) == 0) {
706  x = op(init, x);
707  }
708  T scan = exclusive_scan(x, op);
709  if (get_local_id().get(0) == 0) {
710  scan = init;
711  }
712  return scan;
713 #else
714  (void)x;
715  (void)init;
716  (void)op;
718  "Sub-groups are not supported on host.");
719 #endif
720  }
721 
722  template <typename T, class BinaryOperation>
723  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
724  "sycl::ext::oneapi::inclusive_scan instead.")
725  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
726 #ifdef __SYCL_DEVICE_ONLY__
727  return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>(
728  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
729 #else
730  (void)x;
731  (void)op;
733  "Sub-groups are not supported on host.");
734 #endif
735  }
736 
737  template <typename T, class BinaryOperation>
738  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
739  "sycl::ext::oneapi::inclusive_scan instead.")
740  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
741  T init) const {
742 #ifdef __SYCL_DEVICE_ONLY__
743  if (get_local_id().get(0) == 0) {
744  x = op(init, x);
745  }
746  return inclusive_scan(x, op);
747 #else
748  (void)x;
749  (void)op;
750  (void)init;
752  "Sub-groups are not supported on host.");
753 #endif
754  }
755 
756  linear_id_type get_group_linear_range() const {
757 #ifdef __SYCL_DEVICE_ONLY__
758  return static_cast<linear_id_type>(get_group_range()[0]);
759 #else
761  "Sub-groups are not supported on host.");
762 #endif
763  }
764 
766 #ifdef __SYCL_DEVICE_ONLY__
767  return static_cast<linear_id_type>(get_local_range()[0]);
768 #else
770  "Sub-groups are not supported on host.");
771 #endif
772  }
773 
774  bool leader() const {
775 #ifdef __SYCL_DEVICE_ONLY__
776  return get_local_linear_id() == 0;
777 #else
779  "Sub-groups are not supported on host.");
780 #endif
781  }
782 
783  // Common member functions for by-value semantics
784  friend bool operator==(const sub_group &lhs, const sub_group &rhs) {
785 #ifdef __SYCL_DEVICE_ONLY__
786  return lhs.get_group_id() == rhs.get_group_id();
787 #else
788  std::ignore = lhs;
789  std::ignore = rhs;
791  "Sub-groups are not supported on host.");
792 #endif
793  }
794 
795  friend bool operator!=(const sub_group &lhs, const sub_group &rhs) {
796 #ifdef __SYCL_DEVICE_ONLY__
797  return !(lhs == rhs);
798 #else
799  std::ignore = lhs;
800  std::ignore = rhs;
802  "Sub-groups are not supported on host.");
803 #endif
804  }
805 
806 protected:
807  template <int dimensions> friend class sycl::nd_item;
810  sub_group() = default;
811 };
812 
813 namespace ext::oneapi {
815  "use sycl::ext::oneapi::experimental::this_sub_group() instead")
816 inline sycl::sub_group this_sub_group() {
817 #ifdef __SYCL_DEVICE_ONLY__
818  return sycl::sub_group();
819 #else
821  "Sub-groups are not supported on host.");
822 #endif
823 }
824 namespace experimental {
826 #ifdef __SYCL_DEVICE_ONLY__
827  return sycl::sub_group();
828 #else
830  "Sub-groups are not supported on host.");
831 #endif
832 }
833 } // namespace experimental
834 } // namespace ext::oneapi
835 
836 } // namespace _V1
837 } // namespace sycl
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: multi_ptr.hpp:83
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:95
pointer get() const
Definition: multi_ptr.hpp:293
decorated_type * get_decorated() const
Definition: multi_ptr.hpp:294
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:544
defined(__INTEL_PREVIEW_BREAKING_CHANGES)
Definition: types.hpp:346
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
Definition: sub_group.hpp:71
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
Definition: sub_group.hpp:43
auto convertToBlockPtr(MultiPtrTy MultiPtr)
Definition: sub_group.hpp:45
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
Definition: sub_group.hpp:66
auto get_local_linear_range(Group g)
select_apply_cl_scalar_t< T, sycl::opencl::cl_uchar, sycl::opencl::cl_ushort, sycl::opencl::cl_uint, sycl::opencl::cl_ulong > select_cl_scalar_integral_unsigned_t
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:198
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
Definition: sub_group.hpp:120
bool operator==(const cache_config &lhs, const cache_config &rhs)
sycl::sub_group this_sub_group()
__SYCL_DEPRECATED("use sycl::ext::oneapi::experimental::this_sub_group() instead") inline sycl
Definition: sub_group.hpp:814
pointer_t get() const
Definition: multi_ptr.hpp:974
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:107
typename remove_decoration< T >::type remove_decoration_t
Definition: access.hpp:325
Definition: access.hpp:18
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:516
linear_id_type get_group_linear_id() const
Definition: sub_group.hpp:198
T shuffle_up(T x, uint32_t delta) const
Definition: sub_group.hpp:245
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:585
T load(CVT *src) const
Definition: sub_group.hpp:303
range_type get_max_local_range() const
Definition: sub_group.hpp:180
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
Definition: sub_group.hpp:420
id_type get_group_id() const
Definition: sub_group.hpp:189
range_type get_local_range() const
Definition: sub_group.hpp:171
static constexpr int dimensions
Definition: sub_group.hpp:147
static constexpr sycl::memory_scope fence_scope
Definition: sub_group.hpp:148
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:314
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:597
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:334
id_type get_local_id() const
Definition: sub_group.hpp:153
void store(T *dst, const T &x) const
Definition: sub_group.hpp:485
__SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated." "Use barrier() without a fence_space instead.") void barrier(access
Definition: sub_group.hpp:625
void barrier() const
Definition: sub_group.hpp:611
friend bool operator!=(const sub_group &lhs, const sub_group &rhs)
Definition: sub_group.hpp:795
T shuffle(T x, id_type local_id) const
Definition: sub_group.hpp:223
linear_id_type get_local_linear_id() const
Definition: sub_group.hpp:162
T shuffle_xor(T x, id_type value) const
Definition: sub_group.hpp:256
T shuffle_down(T x, uint32_t delta) const
Definition: sub_group.hpp:234
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:432
std::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
Definition: sub_group.hpp:218
range_type get_group_range() const
Definition: sub_group.hpp:207
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:497