DPC++ Runtime
Runtime libraries for oneAPI DPC++
sub_group.hpp
Go to the documentation of this file.
1 //==----------- sub_group.hpp --- SYCL sub-group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp>
13 #include <sycl/access/access.hpp>
14 #include <sycl/detail/defines.hpp>
16 #include <sycl/detail/helpers.hpp>
17 #include <sycl/detail/spirv.hpp>
20 #include <sycl/id.hpp>
21 #include <sycl/memory_enums.hpp>
22 #include <sycl/range.hpp>
23 #include <sycl/types.hpp>
24 
25 #include <type_traits>
26 
27 namespace sycl {
29 template <typename T, access::address_space Space,
30  access::decorated DecorateAddress>
31 class multi_ptr;
32 
33 namespace detail {
34 
35 namespace sub_group {
36 
37 // Selects 8, 16, 32, or 64-bit type depending on size of scalar type T.
38 template <typename T>
40 
41 template <typename T, access::address_space Space>
43  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
44  Space == access::address_space::global_space>;
45 
46 template <typename T, access::address_space Space>
48  std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
49  Space == access::address_space::local_space>;
50 
51 #ifdef __SYCL_DEVICE_ONLY__
52 template <typename T, access::address_space Space,
53  access::decorated DecorateAddress>
54 T load(const multi_ptr<T, Space, DecorateAddress> src) {
55  using BlockT = SelectBlockT<T>;
58 
59  BlockT Ret =
60  __spirv_SubgroupBlockReadINTEL<BlockT>(reinterpret_cast<PtrT>(src.get()));
61 
62  return sycl::bit_cast<T>(Ret);
63 }
64 
65 template <int N, typename T, access::address_space Space,
66  access::decorated DecorateAddress>
68  using BlockT = SelectBlockT<T>;
69  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
72 
73  VecT Ret =
74  __spirv_SubgroupBlockReadINTEL<VecT>(reinterpret_cast<PtrT>(src.get()));
75 
76  return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
77 }
78 
79 template <typename T, access::address_space Space,
80  access::decorated DecorateAddress>
81 void store(multi_ptr<T, Space, DecorateAddress> dst, const T &x) {
82  using BlockT = SelectBlockT<T>;
85 
86  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
87  sycl::bit_cast<BlockT>(x));
88 }
89 
90 template <int N, typename T, access::address_space Space,
91  access::decorated DecorateAddress>
92 void store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) {
93  using BlockT = SelectBlockT<T>;
94  using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
96  const multi_ptr<BlockT, Space, DecorateAddress>>;
97 
98  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
99  sycl::bit_cast<VecT>(x));
100 }
101 #endif // __SYCL_DEVICE_ONLY__
102 
103 } // namespace sub_group
104 
105 // Helper for removing const and volatile qualifiers from the element type of
106 // a multi_ptr.
107 template <typename CVT, access::address_space Space,
108  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
109 inline multi_ptr<T, Space, IsDecorated>
111  if constexpr (IsDecorated == access::decorated::legacy) {
113  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer_t>(
114  Mptr.get())};
115  } else {
117  const_cast<typename multi_ptr<T, Space, IsDecorated>::pointer>(
118  Mptr.get_decorated())};
119  }
120 }
121 
122 } // namespace detail
123 
124 namespace ext::oneapi {
125 
126 struct sub_group;
127 namespace experimental {
128 inline sub_group this_sub_group();
129 }
130 
131 struct sub_group {
132 
133  using id_type = id<1>;
135  using linear_id_type = uint32_t;
136  static constexpr int dimensions = 1;
137  static constexpr sycl::memory_scope fence_scope =
138  sycl::memory_scope::sub_group;
139 
140  /* --- common interface members --- */
141 
143 #ifdef __SYCL_DEVICE_ONLY__
144  return __spirv_SubgroupLocalInvocationId();
145 #else
146  throw runtime_error("Sub-groups are not supported on host device.",
147  PI_ERROR_INVALID_DEVICE);
148 #endif
149  }
150 
152 #ifdef __SYCL_DEVICE_ONLY__
153  return static_cast<linear_id_type>(get_local_id()[0]);
154 #else
155  throw runtime_error("Sub-groups are not supported on host device.",
156  PI_ERROR_INVALID_DEVICE);
157 #endif
158  }
159 
161 #ifdef __SYCL_DEVICE_ONLY__
162  return __spirv_SubgroupSize();
163 #else
164  throw runtime_error("Sub-groups are not supported on host device.",
165  PI_ERROR_INVALID_DEVICE);
166 #endif
167  }
168 
170 #ifdef __SYCL_DEVICE_ONLY__
171  return __spirv_SubgroupMaxSize();
172 #else
173  throw runtime_error("Sub-groups are not supported on host device.",
174  PI_ERROR_INVALID_DEVICE);
175 #endif
176  }
177 
179 #ifdef __SYCL_DEVICE_ONLY__
180  return __spirv_SubgroupId();
181 #else
182  throw runtime_error("Sub-groups are not supported on host device.",
183  PI_ERROR_INVALID_DEVICE);
184 #endif
185  }
186 
188 #ifdef __SYCL_DEVICE_ONLY__
189  return static_cast<linear_id_type>(get_group_id()[0]);
190 #else
191  throw runtime_error("Sub-groups are not supported on host device.",
192  PI_ERROR_INVALID_DEVICE);
193 #endif
194  }
195 
197 #ifdef __SYCL_DEVICE_ONLY__
198  return __spirv_NumSubgroups();
199 #else
200  throw runtime_error("Sub-groups are not supported on host device.",
201  PI_ERROR_INVALID_DEVICE);
202 #endif
203  }
204 
205  template <typename T>
206  using EnableIfIsScalarArithmetic =
207  std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
208 
209  /* --- one-input shuffles --- */
210  /* indices in [0 , sub_group size) */
211 
212  template <typename T> T shuffle(T x, id_type local_id) const {
213 #ifdef __SYCL_DEVICE_ONLY__
214  return sycl::detail::spirv::SubgroupShuffle(x, local_id);
215 #else
216  (void)x;
217  (void)local_id;
218  throw runtime_error("Sub-groups are not supported on host device.",
219  PI_ERROR_INVALID_DEVICE);
220 #endif
221  }
222 
223  template <typename T> T shuffle_down(T x, uint32_t delta) const {
224 #ifdef __SYCL_DEVICE_ONLY__
225  return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
226 #else
227  (void)x;
228  (void)delta;
229  throw runtime_error("Sub-groups are not supported on host device.",
230  PI_ERROR_INVALID_DEVICE);
231 #endif
232  }
233 
234  template <typename T> T shuffle_up(T x, uint32_t delta) const {
235 #ifdef __SYCL_DEVICE_ONLY__
236  return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
237 #else
238  (void)x;
239  (void)delta;
240  throw runtime_error("Sub-groups are not supported on host device.",
241  PI_ERROR_INVALID_DEVICE);
242 #endif
243  }
244 
245  template <typename T> T shuffle_xor(T x, id_type value) const {
246 #ifdef __SYCL_DEVICE_ONLY__
247  return sycl::detail::spirv::SubgroupShuffleXor(x, value);
248 #else
249  (void)x;
250  (void)value;
251  throw runtime_error("Sub-groups are not supported on host device.",
252  PI_ERROR_INVALID_DEVICE);
253 #endif
254  }
255 
256  /* --- sub_group load/stores --- */
257  /* these can map to SIMD or block read/write hardware where available */
258 #ifdef __SYCL_DEVICE_ONLY__
259  // Method for decorated pointer
260  template <typename CVT, typename T = std::remove_cv_t<CVT>>
261  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
262  load(CVT *cv_src) const {
263  T *src = const_cast<T *>(cv_src);
264  return load(sycl::multi_ptr<remove_decoration_t<T>,
265  sycl::detail::deduce_AS<T>::value,
266  sycl::access::decorated::yes>(src));
267  }
268 
269  // Method for raw pointer
270  template <typename CVT, typename T = std::remove_cv_t<CVT>>
271  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
272  load(CVT *cv_src) const {
273  T *src = const_cast<T *>(cv_src);
274 
275 #ifdef __NVPTX__
276  return src[get_local_id()[0]];
277 #else // __NVPTX__
278  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
279  if (l)
280  return load(l);
281 
282  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
283  if (g)
284  return load(g);
285 
286  assert(!"Sub-group load() is supported for local or global pointers only.");
287  return {};
288 #endif // __NVPTX__
289  }
290 #else //__SYCL_DEVICE_ONLY__
291  template <typename CVT, typename T = std::remove_cv_t<CVT>>
292  T load(CVT *src) const {
293  (void)src;
294  throw runtime_error("Sub-groups are not supported on host device.",
295  PI_ERROR_INVALID_DEVICE);
296  }
297 #endif //__SYCL_DEVICE_ONLY__
298 
299  template <typename CVT, access::address_space Space,
300  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
301  std::enable_if_t<
302  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value, T>
306 #ifdef __SYCL_DEVICE_ONLY__
307 #ifdef __NVPTX__
308  return src.get()[get_local_id()[0]];
309 #else
310  return sycl::detail::sub_group::load(src);
311 #endif // __NVPTX__
312 #else
313  (void)src;
314  throw runtime_error("Sub-groups are not supported on host device.",
315  PI_ERROR_INVALID_DEVICE);
316 #endif
317  }
318 
319  template <typename CVT, access::address_space Space,
320  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
321  std::enable_if_t<
322  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value, T>
326 #ifdef __SYCL_DEVICE_ONLY__
327  return src.get()[get_local_id()[0]];
328 #else
329  (void)src;
330  throw runtime_error("Sub-groups are not supported on host device.",
331  PI_ERROR_INVALID_DEVICE);
332 #endif
333  }
334 #ifdef __SYCL_DEVICE_ONLY__
335 #ifdef __NVPTX__
336  template <int N, typename CVT, access::address_space Space,
337  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
338  std::enable_if_t<
339  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
340  vec<T, N>>
341  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
344  vec<T, N> res;
345  for (int i = 0; i < N; ++i) {
346  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
347  }
348  return res;
349  }
350 #else // __NVPTX__
351  template <int N, typename CVT, access::address_space Space,
352  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
353  std::enable_if_t<
354  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
355  N != 1 && N != 3 && N != 16,
356  vec<T, N>>
357  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
360  return sycl::detail::sub_group::load<N, T>(src);
361  }
362 
363  template <int N, typename CVT, access::address_space Space,
364  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
365  std::enable_if_t<
366  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
367  N == 16,
368  vec<T, 16>>
369  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
370  multi_ptr<T, Space, IsDecorated> src =
372  return {sycl::detail::sub_group::load<8, T>(src),
373  sycl::detail::sub_group::load<8, T>(src +
374  8 * get_max_local_range()[0])};
375  }
376 
377  template <int N, typename CVT, access::address_space Space,
378  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
379  std::enable_if_t<
380  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
381  N == 3,
382  vec<T, 3>>
383  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
384  multi_ptr<T, Space, IsDecorated> src =
386  return {
387  sycl::detail::sub_group::load<1, T>(src),
388  sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
389  }
390 
391  template <int N, typename CVT, access::address_space Space,
392  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
393  std::enable_if_t<
394  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
395  N == 1,
396  vec<T, 1>>
397  load(const multi_ptr<CVT, Space, IsDecorated> cv_src) const {
398  multi_ptr<T, Space, IsDecorated> src =
400  return sycl::detail::sub_group::load(src);
401  }
402 #endif // ___NVPTX___
403 #else // __SYCL_DEVICE_ONLY__
404  template <int N, typename CVT, access::address_space Space,
405  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
406  std::enable_if_t<
407  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
408  vec<T, N>>
410  (void)src;
411  throw runtime_error("Sub-groups are not supported on host device.",
412  PI_ERROR_INVALID_DEVICE);
413  }
414 #endif // __SYCL_DEVICE_ONLY__
415 
416  template <int N, typename CVT, access::address_space Space,
417  access::decorated IsDecorated, typename T = std::remove_cv_t<CVT>>
418  std::enable_if_t<
419  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value,
420  vec<T, N>>
424 #ifdef __SYCL_DEVICE_ONLY__
425  vec<T, N> res;
426  for (int i = 0; i < N; ++i) {
427  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
428  }
429  return res;
430 #else
431  (void)src;
432  throw runtime_error("Sub-groups are not supported on host device.",
433  PI_ERROR_INVALID_DEVICE);
434 #endif
435  }
436 
437 #ifdef __SYCL_DEVICE_ONLY__
438  // Method for decorated pointer
439  template <typename T>
440  std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
441  store(T *dst, const remove_decoration_t<T> &x) const {
442  store(sycl::multi_ptr<remove_decoration_t<T>,
443  sycl::detail::deduce_AS<T>::value,
444  sycl::access::decorated::yes>(dst),
445  x);
446  }
447 
448  // Method for raw pointer
449  template <typename T>
450  std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
451  store(T *dst, const remove_decoration_t<T> &x) const {
452 
453 #ifdef __NVPTX__
454  dst[get_local_id()[0]] = x;
455 #else // __NVPTX__
456  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
457  if (l) {
458  store(l, x);
459  return;
460  }
461 
462  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
463  if (g) {
464  store(g, x);
465  return;
466  }
467 
468  assert(
469  !"Sub-group store() is supported for local or global pointers only.");
470  return;
471 #endif // __NVPTX__
472  }
473 #else //__SYCL_DEVICE_ONLY__
474  template <typename T> void store(T *dst, const T &x) const {
475  (void)dst;
476  (void)x;
477  throw runtime_error("Sub-groups are not supported on host device.",
478  PI_ERROR_INVALID_DEVICE);
479  }
480 #endif //__SYCL_DEVICE_ONLY__
481 
482  template <typename T, access::address_space Space,
483  access::decorated DecorateAddress>
484  std::enable_if_t<
485  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
487 #ifdef __SYCL_DEVICE_ONLY__
488 #ifdef __NVPTX__
489  dst.get()[get_local_id()[0]] = x;
490 #else
491  sycl::detail::sub_group::store(dst, x);
492 #endif // __NVPTX__
493 #else
494  (void)dst;
495  (void)x;
496  throw runtime_error("Sub-groups are not supported on host device.",
497  PI_ERROR_INVALID_DEVICE);
498 #endif
499  }
500 
501  template <typename T, access::address_space Space,
502  access::decorated DecorateAddress>
503  std::enable_if_t<
504  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
506 #ifdef __SYCL_DEVICE_ONLY__
507  dst.get()[get_local_id()[0]] = x;
508 #else
509  (void)dst;
510  (void)x;
511  throw runtime_error("Sub-groups are not supported on host device.",
512  PI_ERROR_INVALID_DEVICE);
513 #endif
514  }
515 
516 #ifdef __SYCL_DEVICE_ONLY__
517 #ifdef __NVPTX__
518  template <int N, typename T, access::address_space Space,
519  access::decorated DecorateAddress>
520  std::enable_if_t<
521  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
522  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
523  for (int i = 0; i < N; ++i) {
524  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
525  }
526  }
527 #else // __NVPTX__
528  template <int N, typename T, access::address_space Space,
529  access::decorated DecorateAddress>
530  std::enable_if_t<
531  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
532  N != 1 && N != 3 && N != 16>
533  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, N> &x) const {
534  sycl::detail::sub_group::store(dst, x);
535  }
536 
537  template <int N, typename T, access::address_space Space,
538  access::decorated DecorateAddress>
539  std::enable_if_t<
540  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
541  N == 1>
542  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 1> &x) const {
543  sycl::detail::sub_group::store(dst, x);
544  }
545 
546  template <int N, typename T, access::address_space Space,
547  access::decorated DecorateAddress>
548  std::enable_if_t<
549  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
550  N == 3>
551  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 3> &x) const {
552  store<1, T, Space, DecorateAddress>(dst, x.s0());
553  store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
554  {x.s1(), x.s2()});
555  }
556 
557  template <int N, typename T, access::address_space Space,
558  access::decorated DecorateAddress>
559  std::enable_if_t<
560  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
561  N == 16>
562  store(multi_ptr<T, Space, DecorateAddress> dst, const vec<T, 16> &x) const {
563  store<8, T, Space, DecorateAddress>(dst, x.lo());
564  store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
565  x.hi());
566  }
567 
568 #endif // __NVPTX__
569 #else // __SYCL_DEVICE_ONLY__
570  template <int N, typename T, access::address_space Space,
571  access::decorated DecorateAddress>
572  std::enable_if_t<
573  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
575  (void)dst;
576  (void)x;
577  throw runtime_error("Sub-groups are not supported on host device.",
578  PI_ERROR_INVALID_DEVICE);
579  }
580 #endif // __SYCL_DEVICE_ONLY__
581 
582  template <int N, typename T, access::address_space Space,
583  access::decorated DecorateAddress>
584  std::enable_if_t<
585  sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
587 #ifdef __SYCL_DEVICE_ONLY__
588  for (int i = 0; i < N; ++i) {
589  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
590  }
591 #else
592  (void)dst;
593  (void)x;
594  throw runtime_error("Sub-groups are not supported on host device.",
595  PI_ERROR_INVALID_DEVICE);
596 #endif
597  }
598 
599  /* --- synchronization functions --- */
600  void barrier() const {
601 #ifdef __SYCL_DEVICE_ONLY__
608 #else
609  throw runtime_error("Sub-groups are not supported on host device.",
610  PI_ERROR_INVALID_DEVICE);
611 #endif
612  }
613 
614  __SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated."
615  "Use barrier() without a fence_space instead.")
616  void barrier(access::fence_space accessSpace) const {
617 #ifdef __SYCL_DEVICE_ONLY__
618  int32_t flags = sycl::detail::getSPIRVMemorySemanticsMask(accessSpace);
620  flags);
621 #else
622  (void)accessSpace;
623  throw runtime_error("Sub-groups are not supported on host device.",
624  PI_ERROR_INVALID_DEVICE);
625 #endif
626  }
627 
628  /* --- deprecated collective functions --- */
629  template <typename T>
630  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
631  "sycl::ext::oneapi::broadcast instead.")
632  EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
633 #ifdef __SYCL_DEVICE_ONLY__
634  return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
635 #else
636  (void)x;
637  (void)local_id;
638  throw runtime_error("Sub-groups are not supported on host device.",
639  PI_ERROR_INVALID_DEVICE);
640 #endif
641  }
642 
643  template <typename T, class BinaryOperation>
644  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
645  "sycl::ext::oneapi::reduce instead.")
646  EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
647 #ifdef __SYCL_DEVICE_ONLY__
648  return sycl::detail::calc<__spv::GroupOperation::Reduce>(
649  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
650 #else
651  (void)x;
652  (void)op;
653  throw runtime_error("Sub-groups are not supported on host device.",
654  PI_ERROR_INVALID_DEVICE);
655 #endif
656  }
657 
658  template <typename T, class BinaryOperation>
659  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
660  "sycl::ext::oneapi::reduce instead.")
661  EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
662 #ifdef __SYCL_DEVICE_ONLY__
663  return op(init, reduce(x, op));
664 #else
665  (void)x;
666  (void)init;
667  (void)op;
668  throw runtime_error("Sub-groups are not supported on host device.",
669  PI_ERROR_INVALID_DEVICE);
670 #endif
671  }
672 
673  template <typename T, class BinaryOperation>
674  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
675  "sycl::ext::oneapi::exclusive_scan instead.")
676  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
677 #ifdef __SYCL_DEVICE_ONLY__
678  return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
679  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
680 #else
681  (void)x;
682  (void)op;
683  throw runtime_error("Sub-groups are not supported on host device.",
684  PI_ERROR_INVALID_DEVICE);
685 #endif
686  }
687 
688  template <typename T, class BinaryOperation>
689  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
690  "sycl::ext::oneapi::exclusive_scan instead.")
691  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
692  BinaryOperation op) const {
693 #ifdef __SYCL_DEVICE_ONLY__
694  if (get_local_id().get(0) == 0) {
695  x = op(init, x);
696  }
697  T scan = exclusive_scan(x, op);
698  if (get_local_id().get(0) == 0) {
699  scan = init;
700  }
701  return scan;
702 #else
703  (void)x;
704  (void)init;
705  (void)op;
706  throw runtime_error("Sub-groups are not supported on host device.",
707  PI_ERROR_INVALID_DEVICE);
708 #endif
709  }
710 
711  template <typename T, class BinaryOperation>
712  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
713  "sycl::ext::oneapi::inclusive_scan instead.")
714  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
715 #ifdef __SYCL_DEVICE_ONLY__
716  return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>(
717  typename sycl::detail::GroupOpTag<T>::type(), *this, x, op);
718 #else
719  (void)x;
720  (void)op;
721  throw runtime_error("Sub-groups are not supported on host device.",
722  PI_ERROR_INVALID_DEVICE);
723 #endif
724  }
725 
726  template <typename T, class BinaryOperation>
727  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
728  "sycl::ext::oneapi::inclusive_scan instead.")
729  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
730  T init) const {
731 #ifdef __SYCL_DEVICE_ONLY__
732  if (get_local_id().get(0) == 0) {
733  x = op(init, x);
734  }
735  return inclusive_scan(x, op);
736 #else
737  (void)x;
738  (void)op;
739  (void)init;
740  throw runtime_error("Sub-groups are not supported on host device.",
741  PI_ERROR_INVALID_DEVICE);
742 #endif
743  }
744 
745  linear_id_type get_group_linear_range() const {
746 #ifdef __SYCL_DEVICE_ONLY__
747  return static_cast<linear_id_type>(get_group_range()[0]);
748 #else
749  throw runtime_error("Sub-groups are not supported on host device.",
750  PI_ERROR_INVALID_DEVICE);
751 #endif
752  }
753 
754  linear_id_type get_local_linear_range() const {
755 #ifdef __SYCL_DEVICE_ONLY__
756  return static_cast<linear_id_type>(get_local_range()[0]);
757 #else
758  throw runtime_error("Sub-groups are not supported on host device.",
759  PI_ERROR_INVALID_DEVICE);
760 #endif
761  }
762 
763  bool leader() const {
764 #ifdef __SYCL_DEVICE_ONLY__
765  return get_local_linear_id() == 0;
766 #else
767  throw runtime_error("Sub-groups are not supported on host device.",
768  PI_ERROR_INVALID_DEVICE);
769 #endif
770  }
771 
772 protected:
773  template <int dimensions> friend class sycl::nd_item;
774  friend sub_group this_sub_group();
776  sub_group() = default;
777 };
778 
780  "use sycl::ext::oneapi::experimental::this_sub_group() instead")
781 inline sub_group this_sub_group() {
782 #ifdef __SYCL_DEVICE_ONLY__
783  return sub_group();
784 #else
785  throw runtime_error("Sub-groups are not supported on host device.",
786  PI_ERROR_INVALID_DEVICE);
787 #endif
788 }
789 
790 } // namespace ext::oneapi
791 
792 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
793 } // namespace sycl
spirv_ops.hpp
__spv::MemorySemanticsMask::SubgroupMemory
@ SubgroupMemory
Definition: spirv_types.hpp:95
sycl::_V1::detail::select_cl_scalar_integral_unsigned_t
select_apply_cl_scalar_t< T, sycl::opencl::cl_uchar, sycl::opencl::cl_ushort, sycl::opencl::cl_uint, sycl::opencl::cl_ulong > select_cl_scalar_integral_unsigned_t
Definition: generic_type_traits.hpp:477
sycl::_V1::ext::oneapi::sub_group::load
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:323
sycl::_V1::ext::oneapi::sub_group::barrier
void barrier() const
Definition: sub_group.hpp:600
T
type_traits.hpp
__spirv_ControlBarrier
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
sycl::_V1::detail::sub_group::AcceptableForLocalLoadStore
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
Definition: sub_group.hpp:49
sycl::_V1::ext::oneapi::sub_group::linear_id_type
uint32_t linear_id_type
Definition: sub_group.hpp:135
__SYCL_INLINE_VER_NAMESPACE
#define __SYCL_INLINE_VER_NAMESPACE(X)
Definition: defines_elementary.hpp:11
sycl::_V1::detail::sub_group::AcceptableForGlobalLoadStore
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
Definition: sub_group.hpp:44
types.hpp
sycl::_V1::ext::oneapi::sub_group::get_group_range
range_type get_group_range() const
Definition: sub_group.hpp:196
sycl::_V1::ext::oneapi::sub_group::store
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:486
sycl::_V1::ext::oneapi::sub_group::shuffle_xor
T shuffle_xor(T x, id_type value) const
Definition: sub_group.hpp:245
sycl::_V1::ext::intel::esimd::atomic_op::load
@ load
helpers.hpp
spirv_vars.hpp
sycl
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
sycl::_V1::ext::oneapi::sub_group::get_local_id
id_type get_local_id() const
Definition: sub_group.hpp:142
__spv::MemorySemanticsMask::AcquireRelease
@ AcquireRelease
Definition: spirv_types.hpp:92
sycl::_V1::ext::oneapi::sub_group::store
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:574
__spv::MemorySemanticsMask::CrossWorkgroupMemory
@ CrossWorkgroupMemory
Definition: spirv_types.hpp:97
sycl::_V1::ext::oneapi::sub_group::shuffle_down
T shuffle_down(T x, uint32_t delta) const
Definition: sub_group.hpp:223
access.hpp
sycl::_V1::id< 1 >
sycl::_V1::access::fence_space
fence_space
Definition: access.hpp:39
id.hpp
sycl::_V1::remove_decoration_t
typename remove_decoration< T >::type remove_decoration_t
Definition: access.hpp:311
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:46
sycl::_V1::ext::oneapi::sub_group::store
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
Definition: sub_group.hpp:505
sycl::_V1::multi_ptr::get
pointer get() const
Definition: multi_ptr.hpp:244
sycl::_V1::ext::oneapi::sub_group::load
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:421
sycl::_V1::range< 1 >
sycl::_V1::ext::intel::esimd::barrier
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:1641
sycl::_V1::multi_ptr::get_decorated
decorated_type * get_decorated() const
Definition: multi_ptr.hpp:245
sycl::_V1::ext::intel::experimental::esimd::bfn_t::x
@ x
sycl::_V1::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:34
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
sycl::_V1::ext::oneapi::sub_group::get_group_id
id_type get_group_id() const
Definition: sub_group.hpp:178
sycl::_V1::detail::get_local_linear_range
size_t get_local_linear_range(Group g)
__spv::Scope::Subgroup
@ Subgroup
Definition: spirv_types.hpp:35
generic_type_traits.hpp
range.hpp
sycl::_V1::ext::oneapi::sub_group::store
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:586
sycl::_V1::ext::oneapi::sub_group::store
void store(T *dst, const T &x) const
Definition: sub_group.hpp:474
sycl::_V1::detail::sub_group::SelectBlockT
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
Definition: sub_group.hpp:39
spirv.hpp
defines.hpp
sycl::_V1::ext::oneapi::experimental::this_sub_group
sub_group this_sub_group()
Definition: sub_group.hpp:21
sycl::_V1::access::decorated
decorated
Definition: access.hpp:59
sycl::_V1::multi_ptr::pointer
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
Definition: multi_ptr.hpp:90
sycl::_V1::detail::get_local_linear_id
Group::linear_id_type get_local_linear_id(Group g)
__spv::MemorySemanticsMask::WorkgroupMemory
@ WorkgroupMemory
Definition: spirv_types.hpp:96
sycl::_V1::ext::oneapi::sub_group::get_max_local_range
range_type get_max_local_range() const
Definition: sub_group.hpp:169
functional.hpp
sycl::_V1::ext::oneapi::sub_group::shuffle
T shuffle(T x, id_type local_id) const
Definition: sub_group.hpp:212
sycl::_V1::memory_scope
memory_scope
Definition: memory_enums.hpp:26
sycl::_V1::vec
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: aliases.hpp:20
sycl::_V1::ext::intel::esimd::atomic_op::store
@ store
reduce
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
sycl::_V1::ext::oneapi::sub_group::load
T load(CVT *src) const
Definition: sub_group.hpp:292
memory_enums.hpp
sycl::_V1::ext::oneapi::sub_group
Definition: sub_group.hpp:131
sycl::_V1::detail::getSPIRVMemorySemanticsMask
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:191
sycl::_V1::detail::GetUnqualMultiPtr
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
Definition: sub_group.hpp:110
sycl::_V1::detail::ConvertToOpenCLType_t
ConvertToOpenCLTypeImpl_t< SelectMatchingOpenCLType_t< T > > ConvertToOpenCLType_t
Definition: generic_type_traits.hpp:614
sycl::_V1::ext::oneapi::sub_group::load
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
Definition: sub_group.hpp:303
sycl::_V1::ext::oneapi::sub_group::get_local_range
range_type get_local_range() const
Definition: sub_group.hpp:160
sycl::_V1::ext::oneapi::sub_group::load
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
Definition: sub_group.hpp:409
sycl::_V1::ext::oneapi::sub_group::shuffle_up
T shuffle_up(T x, uint32_t delta) const
Definition: sub_group.hpp:234
sycl::_V1::ext::oneapi::sub_group::get_local_linear_id
linear_id_type get_local_linear_id() const
Definition: sub_group.hpp:151
sycl::_V1::Space
Space
Definition: multi_ptr.hpp:1307
sycl::_V1::ext::oneapi::experimental::matrix::scope_t::sub_group
@ sub_group
sycl::_V1::access::address_space
address_space
Definition: access.hpp:47
sycl::_V1::ext::oneapi::sub_group::get_group_linear_id
linear_id_type get_group_linear_id() const
Definition: sub_group.hpp:187