DPC++ Runtime
Runtime libraries for oneAPI DPC++
sub_group.hpp
Go to the documentation of this file.
1 //==----------- sub_group.hpp --- SYCL sub-group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/access/access.hpp> // for address_space, decorated
12 #include <sycl/detail/defines_elementary.hpp> // for __SYCL_DEPRECATED
13 #include <sycl/detail/generic_type_traits.hpp> // for select_cl_scalar_inte...
14 #include <sycl/detail/pi.h> // for PI_ERROR_INVALID_DEVICE
15 #include <sycl/detail/type_traits.hpp> // for is_scalar_arithmetic
16 #include <sycl/exception.hpp> // for exception, make_error...
17 #include <sycl/id.hpp> // for id
18 #include <sycl/memory_enums.hpp> // for memory_scope
19 #include <sycl/multi_ptr.hpp> // for multi_ptr
20 #include <sycl/range.hpp> // for range
21 #include <sycl/types.hpp> // for vec
22 
23 #ifdef __SYCL_DEVICE_ONLY__
25 #endif
26 
27 #include <stdint.h> // for uint32_t
28 #include <tuple> // for _Swallow_assign, ignore
29 #include <type_traits> // for enable_if_t, remove_cv_t
30 
31 namespace sycl {
32 inline namespace _V1 {
33 template <typename T, access::address_space Space,
34  access::decorated DecorateAddress>
35 class multi_ptr;
36 
37 namespace detail {
38 
39 namespace sub_group {
40 
41 // Selects 8, 16, 32, or 64-bit type depending on size of scalar type T.
42 template <typename T>
44 
45 template <typename MultiPtrTy> auto convertToBlockPtr(MultiPtrTy MultiPtr) {
46  static_assert(is_multi_ptr_v<MultiPtrTy>);
47  auto DecoratedPtr = convertToOpenCLType(MultiPtr);
48  using DecoratedPtrTy = decltype(DecoratedPtr);
50 
51  using TargetElemTy = SelectBlockT<ElemTy>;
52  // TODO: Handle cv qualifiers.
53 #ifdef __SYCL_DEVICE_ONLY__
54  using ResultTy =
55  typename DecoratedType<TargetElemTy,
56  deduce_AS<DecoratedPtrTy>::value>::type *;
57 #else
58  using ResultTy = TargetElemTy *;
59 #endif
60  return reinterpret_cast<ResultTy>(DecoratedPtr);
61 }
62 
63 template <typename T, access::address_space Space>
65  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
67 
68 template <typename T, access::address_space Space>
70  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
72 
73 #ifdef __SYCL_DEVICE_ONLY__
74 template <typename T, access::address_space Space,
75  access::decorated DecorateAddress>
76 T load(const multi_ptr<T, Space, DecorateAddress> src) {
77  using BlockT = SelectBlockT<T>;
78  BlockT Ret = __spirv_SubgroupBlockReadINTEL<BlockT>(convertToBlockPtr(src));
79 
80  return sycl::bit_cast<T>(Ret);
81 }
82 
83 template <int N, typename T, access::address_space Space,
84  access::decorated DecorateAddress>
86  using BlockT = SelectBlockT<T>;
87  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
88  VecT Ret = __spirv_SubgroupBlockReadINTEL<VecT>(convertToBlockPtr(src));
89 
90  return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
91 }
92 
93 template <typename T, access::address_space Space,
94  access::decorated DecorateAddress>
95 void store(multi_ptr<T, Space, DecorateAddress> dst, const T &x) {
96  using BlockT = SelectBlockT<T>;
97 
98  __spirv_SubgroupBlockWriteINTEL(convertToBlockPtr(dst),
99  sycl::bit_cast<BlockT>(x));
100 }
101 
102 template <int N, typename T, access::address_space Space,
103  access::decorated DecorateAddress>
104 void store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) {
105  using BlockT = SelectBlockT<T>;
106  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
107 
108  __spirv_SubgroupBlockWriteINTEL(convertToBlockPtr(dst),
109  sycl::bit_cast<VecT>(x));
110 }
111 #endif // __SYCL_DEVICE_ONLY__
112 
113 } // namespace sub_group
114 
115 // Helper for removing const and volatile qualifiers from the element type of
116 // a multi_ptr.
117 template <typename CVT, access::address_space Space,
118  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
119 inline multi_ptr<T, Space, IsDecorated>
121  if constexpr (IsDecorated == access::decorated::legacy) {
123  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer_t>(
124  Mptr.get())};
125  } else {
127  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer>(
128  Mptr.get_decorated())};
129  }
130 }
131 
132 } // namespace detail
133 
134 struct sub_group;
135 namespace ext::oneapi::this_work_item {
137 } // namespace ext::oneapi::this_work_item
138 
139 struct sub_group {
140 
141  using id_type = id<1>;
143  using linear_id_type = uint32_t;
144  static constexpr int dimensions = 1;
145  static constexpr sycl::memory_scope fence_scope =
147 
148  /* --- common interface members --- */
149 
151 #ifdef __SYCL_DEVICE_ONLY__
152  return __spirv_SubgroupLocalInvocationId();
153 #else
155  "Sub-groups are not supported on host.");
156 #endif
157  }
158 
160 #ifdef __SYCL_DEVICE_ONLY__
161  return static_cast<linear_id_type>(get_local_id()[0]);
162 #else
164  "Sub-groups are not supported on host.");
165 #endif
166  }
167 
169 #ifdef __SYCL_DEVICE_ONLY__
170  return __spirv_SubgroupSize();
171 #else
173  "Sub-groups are not supported on host.");
174 #endif
175  }
176 
178 #ifdef __SYCL_DEVICE_ONLY__
179  return __spirv_SubgroupMaxSize();
180 #else
182  "Sub-groups are not supported on host.");
183 #endif
184  }
185 
187 #ifdef __SYCL_DEVICE_ONLY__
188  return __spirv_SubgroupId();
189 #else
191  "Sub-groups are not supported on host.");
192 #endif
193  }
194 
196 #ifdef __SYCL_DEVICE_ONLY__
197  return static_cast<linear_id_type>(get_group_id()[0]);
198 #else
200  "Sub-groups are not supported on host.");
201 #endif
202  }
203 
205 #ifdef __SYCL_DEVICE_ONLY__
206  return __spirv_NumSubgroups();
207 #else
209  "Sub-groups are not supported on host.");
210 #endif
211  }
212 
213  template <typename T>
215  std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
216 
217  /* --- one-input shuffles --- */
218  /* indices in [0 , sub_group size) */
219  template <typename T>
220  __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.")
221  T shuffle(T x, id_type local_id) const {
222 #ifdef __SYCL_DEVICE_ONLY__
223  return sycl::detail::spirv::Shuffle(*this, x, local_id);
224 #else
225  (void)x;
226  (void)local_id;
228  "Sub-groups are not supported on host.");
229 #endif
230  }
231 
232  template <typename T>
233  __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.")
234  T shuffle_down(T x, uint32_t delta) const {
235 #ifdef __SYCL_DEVICE_ONLY__
236  return sycl::detail::spirv::ShuffleDown(*this, x, delta);
237 #else
238  (void)x;
239  (void)delta;
241  "Sub-groups are not supported on host.");
242 #endif
243  }
244 
245  template <typename T>
246  __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.")
247  T shuffle_up(T x, uint32_t delta) const {
248 #ifdef __SYCL_DEVICE_ONLY__
249  return sycl::detail::spirv::ShuffleUp(*this, x, delta);
250 #else
251  (void)x;
252  (void)delta;
254  "Sub-groups are not supported on host.");
255 #endif
256  }
257 
258  template <typename T>
259  __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.")
260  T shuffle_xor(T x, id_type value) const {
261 #ifdef __SYCL_DEVICE_ONLY__
262  return sycl::detail::spirv::ShuffleXor(*this, x, value);
263 #else
264  (void)x;
265  (void)value;
267  "Sub-groups are not supported on host.");
268 #endif
269  }
270 
271  /* --- sub_group load/stores --- */
272  /* these can map to SIMD or block read/write hardware where available */
273 #ifdef __SYCL_DEVICE_ONLY__
274  // Method for decorated pointer
275  template <typename CVT, typename T = std::remove_cv_t<CVT>>
276  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
277  load(CVT *cv_src) const {
278  T *src = const_cast<T *>(cv_src);
280  sycl::detail::deduce_AS<T>::value,
281  sycl::access::decorated::yes>(src));
282  }
283 
284  // Method for raw pointer
285  template <typename CVT, typename T = std::remove_cv_t<CVT>>
286  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
287  load(CVT *cv_src) const {
288  T *src = const_cast<T *>(cv_src);
289 
290 #if defined(__NVPTX__) || defined(__AMDGCN__)
291  return src[get_local_id()[0]];
292 #else // __NVPTX__ || __AMDGCN__
293  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
294  if (l)
295  return load(l);
296 
297  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
298  if (g)
299  return load(g);
300 
301  assert(!"Sub-group load() is supported for local or global pointers only.");
302  return {};
303 #endif // __NVPTX__ || __AMDGCN__
304  }
305 #else //__SYCL_DEVICE_ONLY__
306  template <typename CVT, typename T = std::remove_cv_t<CVT>>
307  T load(CVT *src) const {
308  (void)src;
310  "Sub-groups are not supported on host.");
311  }
312 #endif //__SYCL_DEVICE_ONLY__
313 
314  template <typename CVT, access::address_space Space,
315  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
316  std::enable_if_t<
321 #ifdef __SYCL_DEVICE_ONLY__
322 #if defined(__NVPTX__) || defined(__AMDGCN__)
323  return src.get()[get_local_id()[0]];
324 #else
325  return sycl::detail::sub_group::load(src);
326 #endif // __NVPTX__ || __AMDGCN__
327 #else
328  (void)src;
330  "Sub-groups are not supported on host.");
331 #endif // __SYCL_DEVICE_ONLY__
332  }
333 
334  template <typename CVT, access::address_space Space,
335  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
336  std::enable_if_t<
341 #ifdef __SYCL_DEVICE_ONLY__
342  return src.get()[get_local_id()[0]];
343 #else
344  (void)src;
346  "Sub-groups are not supported on host.");
347 #endif
348  }
349 #ifdef __SYCL_DEVICE_ONLY__
350 #if defined(__NVPTX__) || defined(__AMDGCN__)
351  template <int N, typename CVT, access::address_space Space,
352  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
353  std::enable_if_t<
355  vec<T, N>>
356  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
359  vec<T, N> res;
360  for (int i = 0; i < N; ++i) {
361  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
362  }
363  return res;
364  }
365 #else // __NVPTX__ || __AMDGCN__
366  template <int N, typename CVT, access::address_space Space,
367  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
368  std::enable_if_t<
370  N != 1 && N != 3 && N != 16,
371  vec<T, N>>
372  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
373  multi_ptr<T, Space, IsDecorated> src =
375  return sycl::detail::sub_group::load<N, T>(src);
376  }
377 
378  template <int N, typename CVT, access::address_space Space,
379  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
380  std::enable_if_t<
382  N == 16,
383  vec<T, 16>>
384  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
385  multi_ptr<T, Space, IsDecorated> src =
387  return {sycl::detail::sub_group::load<8, T>(src),
388  sycl::detail::sub_group::load<8, T>(src +
389  8 * get_max_local_range()[0])};
390  }
391 
392  template <int N, typename CVT, access::address_space Space,
393  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
394  std::enable_if_t<
396  N == 3,
397  vec<T, 3>>
398  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
399  multi_ptr<T, Space, IsDecorated> src =
401  return {
402  sycl::detail::sub_group::load<1, T>(src),
403  sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
404  }
405 
406  template <int N, typename CVT, access::address_space Space,
407  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
408  std::enable_if_t<
410  N == 1,
411  vec<T, 1>>
412  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
413  multi_ptr<T, Space, IsDecorated> src =
415  return sycl::detail::sub_group::load(src);
416  }
417 #endif // ___NVPTX___
418 #else // __SYCL_DEVICE_ONLY__
419  template <int N, typename CVT, access::address_space Space,
420  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
421  std::enable_if_t<
423  vec<T, N>>
425  (void)src;
427  "Sub-groups are not supported on host.");
428  }
429 #endif // __SYCL_DEVICE_ONLY__
430 
431  template <int N, typename CVT, access::address_space Space,
432  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
433  std::enable_if_t<
435  vec<T, N>>
439 #ifdef __SYCL_DEVICE_ONLY__
440  vec<T, N> res;
441  for (int i = 0; i < N; ++i) {
442  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
443  }
444  return res;
445 #else
446  (void)src;
448  "Sub-groups are not supported on host.");
449 #endif
450  }
451 
452 #ifdef __SYCL_DEVICE_ONLY__
453  // Method for decorated pointer
454  template <typename T>
455  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
456  store(T *dst, const remove_decoration_t<T> &x) const {
458  sycl::detail::deduce_AS<T>::value,
459  sycl::access::decorated::yes>(dst),
460  x);
461  }
462 
463  // Method for raw pointer
464  template <typename T>
465  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
466  store(T *dst, const remove_decoration_t<T> &x) const {
467 
468 #if defined(__NVPTX__) || defined(__AMDGCN__)
469  dst[get_local_id()[0]] = x;
470 #else // __NVPTX__ || __AMDGCN__
471  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
472  if (l) {
473  store(l, x);
474  return;
475  }
476 
477  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
478  if (g) {
479  store(g, x);
480  return;
481  }
482 
483  assert(
484  !"Sub-group store() is supported for local or global pointers only.");
485  return;
486 #endif // __NVPTX__ || __AMDGCN__
487  }
488 #else //__SYCL_DEVICE_ONLY__
489  template <typename T> void store(T *dst, const T &x) const {
490  (void)dst;
491  (void)x;
493  "Sub-groups are not supported on host.");
494  }
495 #endif //__SYCL_DEVICE_ONLY__
496 
497  template <typename T, access::address_space Space,
498  access::decorated DecorateAddress>
499  std::enable_if_t<
502 #ifdef __SYCL_DEVICE_ONLY__
503 #if defined(__NVPTX__) || defined(__AMDGCN__)
504  dst.get()[get_local_id()[0]] = x;
505 #else
506  sycl::detail::sub_group::store(dst, x);
507 #endif // __NVPTX__ || __AMDGCN__
508 #else
509  (void)dst;
510  (void)x;
512  "Sub-groups are not supported on host.");
513 #endif
514  }
515 
516  template <typename T, access::address_space Space,
517  access::decorated DecorateAddress>
518  std::enable_if_t<
521 #ifdef __SYCL_DEVICE_ONLY__
522  dst.get()[get_local_id()[0]] = x;
523 #else
524  (void)dst;
525  (void)x;
527  "Sub-groups are not supported on host.");
528 #endif
529  }
530 
531 #ifdef __SYCL_DEVICE_ONLY__
532 #if defined(__NVPTX__) || defined(__AMDGCN__)
533  template <int N, typename T, access::address_space Space,
534  access::decorated DecorateAddress>
535  std::enable_if_t<
538  for (int i = 0; i < N; ++i) {
539  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
540  }
541  }
542 #else // __NVPTX__ || __AMDGCN__
543  template <int N, typename T, access::address_space Space,
544  access::decorated DecorateAddress>
545  std::enable_if_t<
547  N != 1 && N != 3 && N != 16>
548  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
549  sycl::detail::sub_group::store(dst, x);
550  }
551 
552  template <int N, typename T, access::address_space Space,
553  access::decorated DecorateAddress>
554  std::enable_if_t<
556  N == 1>
557  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 1> &x) const {
558  sycl::detail::sub_group::store(dst, x);
559  }
560 
561  template <int N, typename T, access::address_space Space,
562  access::decorated DecorateAddress>
563  std::enable_if_t<
565  N == 3>
566  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 3> &x) const {
567  store<1, T, Space, DecorateAddress>(dst, x.s0());
568  store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
569  {x.s1(), x.s2()});
570  }
571 
572  template <int N, typename T, access::address_space Space,
573  access::decorated DecorateAddress>
574  std::enable_if_t<
576  N == 16>
577  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 16> &x) const {
578  store<8, T, Space, DecorateAddress>(dst, x.lo());
579  store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
580  x.hi());
581  }
582 
583 #endif // __NVPTX__ || __AMDGCN__
584 #else // __SYCL_DEVICE_ONLY__
585  template <int N, typename T, access::address_space Space,
586  access::decorated DecorateAddress>
587  std::enable_if_t<
590  (void)dst;
591  (void)x;
593  "Sub-groups are not supported on host.");
594  }
595 #endif // __SYCL_DEVICE_ONLY__
596 
597  template <int N, typename T, access::address_space Space,
598  access::decorated DecorateAddress>
599  std::enable_if_t<
602 #ifdef __SYCL_DEVICE_ONLY__
603  for (int i = 0; i < N; ++i) {
604  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
605  }
606 #else
607  (void)dst;
608  (void)x;
610  "Sub-groups are not supported on host.");
611 #endif
612  }
613 
614  /* --- synchronization functions --- */
616  "Sub-group barrier with no arguments is deprecated."
617  "Use sycl::group_barrier with the sub-group as the argument instead.")
618  void barrier() const {
619 #ifdef __SYCL_DEVICE_ONLY__
626 #else
628  "Sub-groups are not supported on host.");
629 #endif
630  }
631 
633  "Sub-group barrier accepting fence_space is deprecated."
634  "Use sycl::group_barrier with the sub-group as the argument instead.")
635  void barrier(access::fence_space accessSpace) const {
636 #ifdef __SYCL_DEVICE_ONLY__
637  int32_t flags = sycl::detail::getSPIRVMemorySemanticsMask(accessSpace);
639  flags);
640 #else
641  (void)accessSpace;
643  "Sub-groups are not supported on host.");
644 #endif
645  }
646 
647 #ifndef __INTEL_PREVIEW_BREAKING_CHANGES__
648  /* --- deprecated collective functions --- */
649  template <typename T>
650  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
651  "sycl::ext::oneapi::broadcast instead.")
652  EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
653 #ifdef __SYCL_DEVICE_ONLY__
654  return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
655 #else
656  (void)x;
657  (void)local_id;
659  "Sub-groups are not supported on host.");
660 #endif
661  }
662 
663  template <typename T, class BinaryOperation>
664  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
665  "sycl::ext::oneapi::reduce instead.")
666  EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
667 #ifdef __SYCL_DEVICE_ONLY__
668  return sycl::detail::calc<__spv::GroupOperation::Reduce>(
669  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
670 #else
671  (void)x;
672  (void)op;
674  "Sub-groups are not supported on host.");
675 #endif
676  }
677 
678  template <typename T, class BinaryOperation>
679  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
680  "sycl::ext::oneapi::reduce instead.")
681  EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
682 #ifdef __SYCL_DEVICE_ONLY__
683  return op(init, reduce(x, op));
684 #else
685  (void)x;
686  (void)init;
687  (void)op;
689  "Sub-groups are not supported on host.");
690 #endif
691  }
692 
693  template <typename T, class BinaryOperation>
694  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
695  "sycl::ext::oneapi::exclusive_scan instead.")
696  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
697 #ifdef __SYCL_DEVICE_ONLY__
698  return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
699  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
700 #else
701  (void)x;
702  (void)op;
704  "Sub-groups are not supported on host.");
705 #endif
706  }
707 
708  template <typename T, class BinaryOperation>
709  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
710  "sycl::ext::oneapi::exclusive_scan instead.")
711  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
712  BinaryOperation op) const {
713 #ifdef __SYCL_DEVICE_ONLY__
714  if (get_local_id().get(0) == 0) {
715  x = op(init, x);
716  }
717  T scan = exclusive_scan(x, op);
718  if (get_local_id().get(0) == 0) {
719  scan = init;
720  }
721  return scan;
722 #else
723  (void)x;
724  (void)init;
725  (void)op;
727  "Sub-groups are not supported on host.");
728 #endif
729  }
730 
731  template <typename T, class BinaryOperation>
732  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
733  "sycl::ext::oneapi::inclusive_scan instead.")
734  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
735 #ifdef __SYCL_DEVICE_ONLY__
736  return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>(
737  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
738 #else
739  (void)x;
740  (void)op;
742  "Sub-groups are not supported on host.");
743 #endif
744  }
745 
746  template <typename T, class BinaryOperation>
747  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
748  "sycl::ext::oneapi::inclusive_scan instead.")
749  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
750  T init) const {
751 #ifdef __SYCL_DEVICE_ONLY__
752  if (get_local_id().get(0) == 0) {
753  x = op(init, x);
754  }
755  return inclusive_scan(x, op);
756 #else
757  (void)x;
758  (void)op;
759  (void)init;
761  "Sub-groups are not supported on host.");
762 #endif
763  }
764 #endif // __INTEL_PREVIEW_BREAKING_CHANGES__
765 
766  linear_id_type get_group_linear_range() const {
767 #ifdef __SYCL_DEVICE_ONLY__
768  return static_cast<linear_id_type>(get_group_range()[0]);
769 #else
771  "Sub-groups are not supported on host.");
772 #endif
773  }
774 
776 #ifdef __SYCL_DEVICE_ONLY__
777  return static_cast<linear_id_type>(get_local_range()[0]);
778 #else
780  "Sub-groups are not supported on host.");
781 #endif
782  }
783 
784  bool leader() const {
785 #ifdef __SYCL_DEVICE_ONLY__
786  return get_local_linear_id() == 0;
787 #else
789  "Sub-groups are not supported on host.");
790 #endif
791  }
792 
793  // Common member functions for by-value semantics
794  friend bool operator==(const sub_group &lhs, const sub_group &rhs) {
795 #ifdef __SYCL_DEVICE_ONLY__
796  return lhs.get_group_id() == rhs.get_group_id();
797 #else
798  std::ignore = lhs;
799  std::ignore = rhs;
801  "Sub-groups are not supported on host.");
802 #endif
803  }
804 
805  friend bool operator!=(const sub_group &lhs, const sub_group &rhs) {
806 #ifdef __SYCL_DEVICE_ONLY__
807  return !(lhs == rhs);
808 #else
809  std::ignore = lhs;
810  std::ignore = rhs;
812  "Sub-groups are not supported on host.");
813 #endif
814  }
815 
816 protected:
817  template <int dimensions> friend class sycl::nd_item;
819  sub_group() = default;
820 };
821 } // namespace _V1
822 } // namespace sycl
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: types.hpp:284
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:7951
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
Definition: sub_group.hpp:71
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
Definition: sub_group.hpp:43
auto convertToBlockPtr(MultiPtrTy MultiPtr)
Definition: sub_group.hpp:45
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
Definition: sub_group.hpp:66
auto get_local_linear_range(Group g)
select_apply_cl_scalar_t< T, sycl::opencl::cl_uchar, sycl::opencl::cl_ushort, sycl::opencl::cl_uint, sycl::opencl::cl_ulong > select_cl_scalar_integral_unsigned_t
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:198
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
Definition: sub_group.hpp:120
bool operator==(const cache_config &lhs, const cache_config &rhs)
pointer get() const
Definition: multi_ptr.hpp:544
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
autodecltype(x) x
typename remove_decoration< T >::type remove_decoration_t
Definition: access.hpp:325
Definition: access.hpp:18
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:520
linear_id_type get_group_linear_id() const
Definition: sub_group.hpp:195
T shuffle_up(T x, uint32_t delta) const
Definition: sub_group.hpp:247
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:589
T load(CVT *src) const
Definition: sub_group.hpp:307
__SYCL_DEPRECATED("Sub-group barrier with no arguments is deprecated." "Use sycl::group_barrier with the sub-group as the argument instead.") void barrier() const
Definition: sub_group.hpp:615
range_type get_max_local_range() const
Definition: sub_group.hpp:177
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
Definition: sub_group.hpp:424
id_type get_group_id() const
Definition: sub_group.hpp:186
range_type get_local_range() const
Definition: sub_group.hpp:168
static constexpr int dimensions
Definition: sub_group.hpp:144
static constexpr sycl::memory_scope fence_scope
Definition: sub_group.hpp:145
__SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated." "Use sycl::group_barrier with the sub-group as the argument instead.") void barrier(access
Definition: sub_group.hpp:632
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:318
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:601
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:338
id_type get_local_id() const
Definition: sub_group.hpp:150
void store(T *dst, const T &x) const
Definition: sub_group.hpp:489
friend bool operator!=(const sub_group &lhs, const sub_group &rhs)
Definition: sub_group.hpp:805
T shuffle(T x, id_type local_id) const
Definition: sub_group.hpp:221
linear_id_type get_local_linear_id() const
Definition: sub_group.hpp:159
T shuffle_xor(T x, id_type value) const
Definition: sub_group.hpp:260
T shuffle_down(T x, uint32_t delta) const
Definition: sub_group.hpp:234
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:436
std::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
Definition: sub_group.hpp:215
range_type get_group_range() const
Definition: sub_group.hpp:204
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:501