DPC++ Runtime
Runtime libraries for oneAPI DPC++
sub_group.hpp
Go to the documentation of this file.
1 //==----------- sub_group.hpp --- SYCL sub-group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp>
13 #include <sycl/access/access.hpp>
14 #include <sycl/detail/defines.hpp>
16 #include <sycl/detail/helpers.hpp>
17 #include <sycl/detail/spirv.hpp>
20 #include <sycl/id.hpp>
21 #include <sycl/memory_enums.hpp>
22 #include <sycl/range.hpp>
23 #include <sycl/types.hpp>
24 
25 #include <type_traits>
26 
27 namespace sycl {
29 template <typename T, access::address_space Space,
30  access::decorated DecorateAddress>
31 class multi_ptr;
32 
33 namespace detail {
34 
35 namespace sub_group {
36 
37 // Selects 8, 16, 32, or 64-bit type depending on size of scalar type T.
38 template <typename T>
40 
41 template <typename T, access::address_space Space>
44  Space == access::address_space::global_space>;
45 
46 template <typename T, access::address_space Space>
49  Space == access::address_space::local_space>;
50 
51 #ifdef __SYCL_DEVICE_ONLY__
52 template <typename T, access::address_space Space,
53  access::decorated DecorateAddress>
54 T load(const multi_ptr<T, Space, DecorateAddress> src) {
55  using BlockT = SelectBlockT<T>;
58 
59  BlockT Ret =
60  __spirv_SubgroupBlockReadINTEL<BlockT>(reinterpret_cast<PtrT>(src.get()));
61 
62  return sycl::bit_cast<T>(Ret);
63 }
64 
65 template <int N, typename T, access::address_space Space,
66  access::decorated DecorateAddress>
68  using BlockT = SelectBlockT<T>;
69  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
72 
73  VecT Ret =
74  __spirv_SubgroupBlockReadINTEL<VecT>(reinterpret_cast<PtrT>(src.get()));
75 
76  return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
77 }
78 
79 template <typename T, access::address_space Space,
80  access::decorated DecorateAddress>
81 void store(multi_ptr<T, Space, DecorateAddress> dst, const T &x) {
82  using BlockT = SelectBlockT<T>;
85 
86  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
87  sycl::bit_cast<BlockT>(x));
88 }
89 
90 template <int N, typename T, access::address_space Space,
91  access::decorated DecorateAddress>
92 void store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) {
93  using BlockT = SelectBlockT<T>;
94  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
96  const multi_ptr<BlockT, Space, DecorateAddress>>;
97 
98  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
99  sycl::bit_cast<VecT>(x));
100 }
101 #endif // __SYCL_DEVICE_ONLY__
102 
103 } // namespace sub_group
104 
105 // Helper for removing const and volatile qualifiers from the element type of
106 // a multi_ptr.
107 template <typename CVT, access::address_space Space,
108  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
109 inline multi_ptr<T, Space, IsDecorated>
111  if constexpr (IsDecorated == access::decorated::legacy) {
113  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer_t>(
114  Mptr.get())};
115  } else {
117  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer>(
118  Mptr.get_decorated())};
119  }
120 }
121 
122 } // namespace detail
123 
124 namespace ext::oneapi {
125 
126 struct sub_group;
127 namespace experimental {
128 inline sub_group this_sub_group();
129 }
130 
131 struct sub_group {
132 
133  using id_type = id<1>;
135  using linear_id_type = uint32_t;
136  static constexpr int dimensions = 1;
137  static constexpr sycl::memory_scope fence_scope =
138  sycl::memory_scope::sub_group;
139 
140  /* --- common interface members --- */
141 
143 #ifdef __SYCL_DEVICE_ONLY__
144  return __spirv_SubgroupLocalInvocationId();
145 #else
146  throw runtime_error("Sub-groups are not supported on host device.",
147  PI_ERROR_INVALID_DEVICE);
148 #endif
149  }
150 
152 #ifdef __SYCL_DEVICE_ONLY__
153  return static_cast<linear_id_type>(get_local_id()[0]);
154 #else
155  throw runtime_error("Sub-groups are not supported on host device.",
156  PI_ERROR_INVALID_DEVICE);
157 #endif
158  }
159 
161 #ifdef __SYCL_DEVICE_ONLY__
162  return __spirv_SubgroupSize();
163 #else
164  throw runtime_error("Sub-groups are not supported on host device.",
165  PI_ERROR_INVALID_DEVICE);
166 #endif
167  }
168 
170 #ifdef __SYCL_DEVICE_ONLY__
171  return __spirv_SubgroupMaxSize();
172 #else
173  throw runtime_error("Sub-groups are not supported on host device.",
174  PI_ERROR_INVALID_DEVICE);
175 #endif
176  }
177 
179 #ifdef __SYCL_DEVICE_ONLY__
180  return __spirv_SubgroupId();
181 #else
182  throw runtime_error("Sub-groups are not supported on host device.",
183  PI_ERROR_INVALID_DEVICE);
184 #endif
185  }
186 
188 #ifdef __SYCL_DEVICE_ONLY__
189  return static_cast<linear_id_type>(get_group_id()[0]);
190 #else
191  throw runtime_error("Sub-groups are not supported on host device.",
192  PI_ERROR_INVALID_DEVICE);
193 #endif
194  }
195 
197 #ifdef __SYCL_DEVICE_ONLY__
198  return __spirv_NumSubgroups();
199 #else
200  throw runtime_error("Sub-groups are not supported on host device.",
201  PI_ERROR_INVALID_DEVICE);
202 #endif
203  }
204 
205  template <typename T>
207  std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
208 
209  /* --- one-input shuffles --- */
210  /* indices in [0 , sub_group size) */
211 
212  template <typename T> T shuffle(T x, id_type local_id) const {
213 #ifdef __SYCL_DEVICE_ONLY__
214  return sycl::detail::spirv::SubgroupShuffle(x, local_id);
215 #else
216  (void)x;
217  (void)local_id;
218  throw runtime_error("Sub-groups are not supported on host device.",
219  PI_ERROR_INVALID_DEVICE);
220 #endif
221  }
222 
223  template <typename T> T shuffle_down(T x, uint32_t delta) const {
224 #ifdef __SYCL_DEVICE_ONLY__
225  return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
226 #else
227  (void)x;
228  (void)delta;
229  throw runtime_error("Sub-groups are not supported on host device.",
230  PI_ERROR_INVALID_DEVICE);
231 #endif
232  }
233 
234  template <typename T> T shuffle_up(T x, uint32_t delta) const {
235 #ifdef __SYCL_DEVICE_ONLY__
236  return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
237 #else
238  (void)x;
239  (void)delta;
240  throw runtime_error("Sub-groups are not supported on host device.",
241  PI_ERROR_INVALID_DEVICE);
242 #endif
243  }
244 
245  template <typename T> T shuffle_xor(T x, id_type value) const {
246 #ifdef __SYCL_DEVICE_ONLY__
247  return sycl::detail::spirv::SubgroupShuffleXor(x, value);
248 #else
249  (void)x;
250  (void)value;
251  throw runtime_error("Sub-groups are not supported on host device.",
252  PI_ERROR_INVALID_DEVICE);
253 #endif
254  }
255 
256  /* --- sub_group load/stores --- */
257  /* these can map to SIMD or block read/write hardware where available */
258 #ifdef __SYCL_DEVICE_ONLY__
259  // Method for decorated pointer
260  template <typename CVT, typename T = std::remove_cv_t<CVT>>
261  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
262  load(CVT *cv_src) const {
263  T *src = const_cast<T *>(cv_src);
264  return load(sycl::multi_ptr<remove_decoration_t<T>,
265  sycl::detail::deduce_AS<T>::value,
266  sycl::access::decorated::yes>(src));
267  }
268 
269  // Method for raw pointer
270  template <typename CVT, typename T = std::remove_cv_t<CVT>>
271  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
272  load(CVT *cv_src) const {
273  T *src = const_cast<T *>(cv_src);
274 
275 #ifdef __NVPTX__
276  return src[get_local_id()[0]];
277 #else // __NVPTX__
278  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
279  if (l)
280  return load(l);
281 
282  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
283  if (g)
284  return load(g);
285 
286  assert(!"Sub-group load() is supported for local or global pointers only.");
287  return {};
288 #endif // __NVPTX__
289  }
290 #else //__SYCL_DEVICE_ONLY__
291  template <typename CVT, typename T = std::remove_cv_t<CVT>>
292  T load(CVT *src) const {
293  (void)src;
294  throw runtime_error("Sub-groups are not supported on host device.",
295  PI_ERROR_INVALID_DEVICE);
296  }
297 #endif //__SYCL_DEVICE_ONLY__
298 
299  template <typename CVT, access::address_space Space,
300  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
302  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value, T>
306 #ifdef __SYCL_DEVICE_ONLY__
307 #ifdef __NVPTX__
308  return src.get()[get_local_id()[0]];
309 #else
310  return sycl::detail::sub_group::load(src);
311 #endif // __NVPTX__
312 #else
313  (void)src;
314  throw runtime_error("Sub-groups are not supported on host device.",
315  PI_ERROR_INVALID_DEVICE);
316 #endif
317  }
318 
319  template <typename CVT, access::address_space Space,
320  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
322  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value, T>
326 #ifdef __SYCL_DEVICE_ONLY__
327  return src.get()[get_local_id()[0]];
328 #else
329  (void)src;
330  throw runtime_error("Sub-groups are not supported on host device.",
331  PI_ERROR_INVALID_DEVICE);
332 #endif
333  }
334 #ifdef __SYCL_DEVICE_ONLY__
335 #ifdef __NVPTX__
336  template <int N, typename CVT, access::address_space Space,
337  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
339  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
340  vec<T, N>>
341  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
344  vec<T, N> res;
345  for (int i = 0; i < N; ++i) {
346  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
347  }
348  return res;
349  }
350 #else // __NVPTX__
351  template <int N, typename CVT, access::address_space Space,
352  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
354  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
355  N != 1 && N != 3 && N != 16,
356  vec<T, N>>
357  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
360  return sycl::detail::sub_group::load<N, T>(src);
361  }
362 
363  template <int N, typename CVT, access::address_space Space,
364  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
366  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
367  N == 16,
368  vec<T, 16>>
369  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
370  multi_ptr<T, Space, IsDecorated> src =
372  return {sycl::detail::sub_group::load<8, T>(src),
373  sycl::detail::sub_group::load<8, T>(src +
374  8 * get_max_local_range()[0])};
375  }
376 
377  template <int N, typename CVT, access::address_space Space,
378  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
380  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
381  N == 3,
382  vec<T, 3>>
383  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
384  multi_ptr<T, Space, IsDecorated> src =
386  return {
387  sycl::detail::sub_group::load<1, T>(src),
388  sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
389  }
390 
391  template <int N, typename CVT, access::address_space Space,
392  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
394  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
395  N == 1,
396  vec<T, 1>>
397  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
398  multi_ptr<T, Space, IsDecorated> src =
400  return sycl::detail::sub_group::load(src);
401  }
402 #endif // ___NVPTX___
403 #else // __SYCL_DEVICE_ONLY__
404  template <int N, typename CVT, access::address_space Space,
405  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
407  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
408  vec<T, N>>
410  (void)src;
411  throw runtime_error("Sub-groups are not supported on host device.",
412  PI_ERROR_INVALID_DEVICE);
413  }
414 #endif // __SYCL_DEVICE_ONLY__
415 
416  template <int N, typename CVT, access::address_space Space,
417  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
419  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value,
420  vec<T, N>>
424 #ifdef __SYCL_DEVICE_ONLY__
425  vec<T, N> res;
426  for (int i = 0; i < N; ++i) {
427  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
428  }
429  return res;
430 #else
431  (void)src;
432  throw runtime_error("Sub-groups are not supported on host device.",
433  PI_ERROR_INVALID_DEVICE);
434 #endif
435  }
436 
437 #ifdef __SYCL_DEVICE_ONLY__
438  // Method for decorated pointer
439  template <typename T>
440  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
441  store(T *dst, const remove_decoration_t<T> &x) const {
442  store(sycl::multi_ptr<remove_decoration_t<T>,
443  sycl::detail::deduce_AS<T>::value,
444  sycl::access::decorated::yes>(dst),
445  x);
446  }
447 
448  // Method for raw pointer
449  template <typename T>
450  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
451  store(T *dst, const remove_decoration_t<T> &x) const {
452 
453 #ifdef __NVPTX__
454  dst[get_local_id()[0]] = x;
455 #else // __NVPTX__
456  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
457  if (l) {
458  store(l, x);
459  return;
460  }
461 
462  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
463  if (g) {
464  store(g, x);
465  return;
466  }
467 
468  assert(
469  !"Sub-group store() is supported for local or global pointers only.");
470  return;
471 #endif // __NVPTX__
472  }
473 #else //__SYCL_DEVICE_ONLY__
474  template <typename T> void store(T *dst, const T &x) const {
475  (void)dst;
476  (void)x;
477  throw runtime_error("Sub-groups are not supported on host device.",
478  PI_ERROR_INVALID_DEVICE);
479  }
480 #endif //__SYCL_DEVICE_ONLY__
481 
482  template <typename T, access::address_space Space,
483  access::decorated DecorateAddress>
485  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
487 #ifdef __SYCL_DEVICE_ONLY__
488 #ifdef __NVPTX__
489  dst.get()[get_local_id()[0]] = x;
490 #else
491  sycl::detail::sub_group::store(dst, x);
492 #endif // __NVPTX__
493 #else
494  (void)dst;
495  (void)x;
496  throw runtime_error("Sub-groups are not supported on host device.",
497  PI_ERROR_INVALID_DEVICE);
498 #endif
499  }
500 
501  template <typename T, access::address_space Space,
502  access::decorated DecorateAddress>
504  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
506 #ifdef __SYCL_DEVICE_ONLY__
507  dst.get()[get_local_id()[0]] = x;
508 #else
509  (void)dst;
510  (void)x;
511  throw runtime_error("Sub-groups are not supported on host device.",
512  PI_ERROR_INVALID_DEVICE);
513 #endif
514  }
515 
516 #ifdef __SYCL_DEVICE_ONLY__
517 #ifdef __NVPTX__
518  template <int N, typename T, access::address_space Space,
519  access::decorated DecorateAddress>
521  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
522  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
523  for (int i = 0; i < N; ++i) {
524  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
525  }
526  }
527 #else // __NVPTX__
528  template <int N, typename T, access::address_space Space,
529  access::decorated DecorateAddress>
531  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
532  N != 1 && N != 3 && N != 16>
533  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
534  sycl::detail::sub_group::store(dst, x);
535  }
536 
537  template <int N, typename T, access::address_space Space,
538  access::decorated DecorateAddress>
540  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
541  N == 1>
542  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 1> &x) const {
543  sycl::detail::sub_group::store(dst, x);
544  }
545 
546  template <int N, typename T, access::address_space Space,
547  access::decorated DecorateAddress>
549  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
550  N == 3>
551  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 3> &x) const {
552  store<1, T, Space, DecorateAddress>(dst, x.s0());
553  store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
554  {x.s1(), x.s2()});
555  }
556 
557  template <int N, typename T, access::address_space Space,
558  access::decorated DecorateAddress>
560  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
561  N == 16>
562  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 16> &x) const {
563  store<8, T, Space, DecorateAddress>(dst, x.lo());
564  store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
565  x.hi());
566  }
567 
568 #endif // __NVPTX__
569 #else // __SYCL_DEVICE_ONLY__
570  template <int N, typename T, access::address_space Space,
571  access::decorated DecorateAddress>
573  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
575  (void)dst;
576  (void)x;
577  throw runtime_error("Sub-groups are not supported on host device.",
578  PI_ERROR_INVALID_DEVICE);
579  }
580 #endif // __SYCL_DEVICE_ONLY__
581 
582  template <int N, typename T, access::address_space Space,
583  access::decorated DecorateAddress>
585  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
587 #ifdef __SYCL_DEVICE_ONLY__
588  for (int i = 0; i < N; ++i) {
589  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
590  }
591 #else
592  (void)dst;
593  (void)x;
594  throw runtime_error("Sub-groups are not supported on host device.",
595  PI_ERROR_INVALID_DEVICE);
596 #endif
597  }
598 
599  /* --- synchronization functions --- */
600  void barrier() const {
601 #ifdef __SYCL_DEVICE_ONLY__
608 #else
609  throw runtime_error("Sub-groups are not supported on host device.",
610  PI_ERROR_INVALID_DEVICE);
611 #endif
612  }
613 
614  __SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated."
615  "Use barrier() without a fence_space instead.")
616  void barrier(access::fence_space accessSpace) const {
617 #ifdef __SYCL_DEVICE_ONLY__
618  int32_t flags = sycl::detail::getSPIRVMemorySemanticsMask(accessSpace);
620  flags);
621 #else
622  (void)accessSpace;
623  throw runtime_error("Sub-groups are not supported on host device.",
624  PI_ERROR_INVALID_DEVICE);
625 #endif
626  }
627 
628  /* --- deprecated collective functions --- */
629  template <typename T>
630  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
631  "sycl::ext::oneapi::broadcast instead.")
632  EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
633 #ifdef __SYCL_DEVICE_ONLY__
634  return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
635 #else
636  (void)x;
637  (void)local_id;
638  throw runtime_error("Sub-groups are not supported on host device.",
639  PI_ERROR_INVALID_DEVICE);
640 #endif
641  }
642 
643  template <typename T, class BinaryOperation>
644  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
645  "sycl::ext::oneapi::reduce instead.")
646  EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
647 #ifdef __SYCL_DEVICE_ONLY__
648  return sycl::detail::calc<T, __spv::GroupOperation::Reduce,
650  typename sycl::detail::GroupOpTag<T>::type(), x, op);
651 #else
652  (void)x;
653  (void)op;
654  throw runtime_error("Sub-groups are not supported on host device.",
655  PI_ERROR_INVALID_DEVICE);
656 #endif
657  }
658 
659  template <typename T, class BinaryOperation>
660  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
661  "sycl::ext::oneapi::reduce instead.")
662  EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
663 #ifdef __SYCL_DEVICE_ONLY__
664  return op(init, reduce(x, op));
665 #else
666  (void)x;
667  (void)init;
668  (void)op;
669  throw runtime_error("Sub-groups are not supported on host device.",
670  PI_ERROR_INVALID_DEVICE);
671 #endif
672  }
673 
674  template <typename T, class BinaryOperation>
675  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
676  "sycl::ext::oneapi::exclusive_scan instead.")
677  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
678 #ifdef __SYCL_DEVICE_ONLY__
679  return sycl::detail::calc<T, __spv::GroupOperation::ExclusiveScan,
681  typename sycl::detail::GroupOpTag<T>::type(), x, op);
682 #else
683  (void)x;
684  (void)op;
685  throw runtime_error("Sub-groups are not supported on host device.",
686  PI_ERROR_INVALID_DEVICE);
687 #endif
688  }
689 
690  template <typename T, class BinaryOperation>
691  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
692  "sycl::ext::oneapi::exclusive_scan instead.")
693  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
694  BinaryOperation op) const {
695 #ifdef __SYCL_DEVICE_ONLY__
696  if (get_local_id().get(0) == 0) {
697  x = op(init, x);
698  }
699  T scan = exclusive_scan(x, op);
700  if (get_local_id().get(0) == 0) {
701  scan = init;
702  }
703  return scan;
704 #else
705  (void)x;
706  (void)init;
707  (void)op;
708  throw runtime_error("Sub-groups are not supported on host device.",
709  PI_ERROR_INVALID_DEVICE);
710 #endif
711  }
712 
713  template <typename T, class BinaryOperation>
714  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
715  "sycl::ext::oneapi::inclusive_scan instead.")
716  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
717 #ifdef __SYCL_DEVICE_ONLY__
718  return sycl::detail::calc<T, __spv::GroupOperation::InclusiveScan,
720  typename sycl::detail::GroupOpTag<T>::type(), x, op);
721 #else
722  (void)x;
723  (void)op;
724  throw runtime_error("Sub-groups are not supported on host device.",
725  PI_ERROR_INVALID_DEVICE);
726 #endif
727  }
728 
729  template <typename T, class BinaryOperation>
730  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
731  "sycl::ext::oneapi::inclusive_scan instead.")
732  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
733  T init) const {
734 #ifdef __SYCL_DEVICE_ONLY__
735  if (get_local_id().get(0) == 0) {
736  x = op(init, x);
737  }
738  return inclusive_scan(x, op);
739 #else
740  (void)x;
741  (void)op;
742  (void)init;
743  throw runtime_error("Sub-groups are not supported on host device.",
744  PI_ERROR_INVALID_DEVICE);
745 #endif
746  }
747 
748  linear_id_type get_group_linear_range() const {
749 #ifdef __SYCL_DEVICE_ONLY__
750  return static_cast<linear_id_type>(get_group_range()[0]);
751 #else
752  throw runtime_error("Sub-groups are not supported on host device.",
753  PI_ERROR_INVALID_DEVICE);
754 #endif
755  }
756 
757  linear_id_type get_local_linear_range() const {
758 #ifdef __SYCL_DEVICE_ONLY__
759  return static_cast<linear_id_type>(get_local_range()[0]);
760 #else
761  throw runtime_error("Sub-groups are not supported on host device.",
762  PI_ERROR_INVALID_DEVICE);
763 #endif
764  }
765 
766  bool leader() const {
767 #ifdef __SYCL_DEVICE_ONLY__
768  return get_local_linear_id() == 0;
769 #else
770  throw runtime_error("Sub-groups are not supported on host device.",
771  PI_ERROR_INVALID_DEVICE);
772 #endif
773  }
774 
775 protected:
776  template <int dimensions> friend class sycl::nd_item;
777  friend sub_group this_sub_group();
779  sub_group() = default;
780 };
781 
783  "use sycl::ext::oneapi::experimental::this_sub_group() instead")
784 inline sub_group this_sub_group() {
785 #ifdef __SYCL_DEVICE_ONLY__
786  return sub_group();
787 #else
788  throw runtime_error("Sub-groups are not supported on host device.",
789  PI_ERROR_INVALID_DEVICE);
790 #endif
791 }
792 
793 } // namespace ext::oneapi
794 
795 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
796 } // namespace sycl
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: multi_ptr.hpp:78
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:90
pointer get() const
Definition: multi_ptr.hpp:247
decorated_type * get_decorated() const
Definition: multi_ptr.hpp:248
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: types.hpp:558
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_DEPRECATED(message)
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:1109
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
Definition: sub_group.hpp:39
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
Definition: sub_group.hpp:49
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
Definition: sub_group.hpp:44
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
select_apply_cl_scalar_t< T, sycl::cl_uchar, sycl::cl_ushort, sycl::cl_uint, sycl::cl_ulong > select_cl_scalar_integral_unsigned_t
std::integral_constant< bool, V > bool_constant
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:199
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
Definition: sub_group.hpp:110
typename std::enable_if< B, T >::type enable_if_t
size_t get_local_linear_range(Group g)
Group::linear_id_type get_local_linear_id(Group g)
sycl::detail::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
typename remove_decoration< T >::type remove_decoration_t
Definition: access.hpp:302
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:323
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:574
T shuffle(T x, id_type local_id) const
Definition: sub_group.hpp:212
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:303
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:421
linear_id_type get_group_linear_id() const
Definition: sub_group.hpp:187
range_type get_group_range() const
Definition: sub_group.hpp:196
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
Definition: sub_group.hpp:409
T shuffle_up(T x, uint32_t delta) const
Definition: sub_group.hpp:234
void store(T *dst, const T &x) const
Definition: sub_group.hpp:474
range_type get_local_range() const
Definition: sub_group.hpp:160
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:586
T shuffle_down(T x, uint32_t delta) const
Definition: sub_group.hpp:223
range_type get_max_local_range() const
Definition: sub_group.hpp:169
linear_id_type get_local_linear_id() const
Definition: sub_group.hpp:151
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:486
std::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
Definition: sub_group.hpp:207
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:505
T shuffle_xor(T x, id_type value) const
Definition: sub_group.hpp:245