DPC++ Runtime
Runtime libraries for oneAPI DPC++
sub_group.hpp
Go to the documentation of this file.
1 //==----------- sub_group.hpp --- SYCL sub-group ---------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <CL/__spirv/spirv_ops.hpp>
17 #include <CL/sycl/detail/spirv.hpp>
19 #include <CL/sycl/id.hpp>
20 #include <CL/sycl/memory_enums.hpp>
21 #include <CL/sycl/range.hpp>
22 #include <CL/sycl/types.hpp>
24 
25 #include <type_traits>
26 
28 namespace sycl {
29 template <typename T, access::address_space Space> class multi_ptr;
30 
31 namespace detail {
32 
33 namespace sub_group {
34 
35 // Selects 8, 16, 32, or 64-bit type depending on size of scalar type T.
36 template <typename T>
38 
39 template <typename T, access::address_space Space>
42  Space == access::address_space::global_space>;
43 
44 template <typename T, access::address_space Space>
47  Space == access::address_space::local_space>;
48 
49 #ifdef __SYCL_DEVICE_ONLY__
50 template <typename T, access::address_space Space>
51 T load(const multi_ptr<T, Space> src) {
52  using BlockT = SelectBlockT<T>;
53  using PtrT =
55 
56  BlockT Ret =
57  __spirv_SubgroupBlockReadINTEL<BlockT>(reinterpret_cast<PtrT>(src.get()));
58 
59  return sycl::bit_cast<T>(Ret);
60 }
61 
62 template <int N, typename T, access::address_space Space>
63 vec<T, N> load(const multi_ptr<T, Space> src) {
64  using BlockT = SelectBlockT<T>;
66  using PtrT =
68 
69  VecT Ret =
70  __spirv_SubgroupBlockReadINTEL<VecT>(reinterpret_cast<PtrT>(src.get()));
71 
72  return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
73 }
74 
75 template <typename T, access::address_space Space>
76 void store(multi_ptr<T, Space> dst, const T &x) {
77  using BlockT = SelectBlockT<T>;
79 
80  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
81  sycl::bit_cast<BlockT>(x));
82 }
83 
84 template <int N, typename T, access::address_space Space>
85 void store(multi_ptr<T, Space> dst, const vec<T, N> &x) {
86  using BlockT = SelectBlockT<T>;
88  using PtrT =
90 
91  __spirv_SubgroupBlockWriteINTEL(reinterpret_cast<PtrT>(dst.get()),
92  sycl::bit_cast<VecT>(x));
93 }
94 #endif // __SYCL_DEVICE_ONLY__
95 
96 } // namespace sub_group
97 
98 } // namespace detail
99 
100 namespace ext {
101 namespace oneapi {
102 
103 struct sub_group;
104 namespace experimental {
105 inline sub_group this_sub_group();
106 }
107 
108 struct sub_group {
109 
110  using id_type = id<1>;
112  using linear_id_type = uint32_t;
113  static constexpr int dimensions = 1;
114  static constexpr sycl::memory_scope fence_scope =
115  sycl::memory_scope::sub_group;
116 
117  /* --- common interface members --- */
118 
120 #ifdef __SYCL_DEVICE_ONLY__
121  return __spirv_SubgroupLocalInvocationId();
122 #else
123  throw runtime_error("Sub-groups are not supported on host device.",
125 #endif
126  }
127 
129 #ifdef __SYCL_DEVICE_ONLY__
130  return static_cast<linear_id_type>(get_local_id()[0]);
131 #else
132  throw runtime_error("Sub-groups are not supported on host device.",
134 #endif
135  }
136 
138 #ifdef __SYCL_DEVICE_ONLY__
139  return __spirv_SubgroupSize();
140 #else
141  throw runtime_error("Sub-groups are not supported on host device.",
143 #endif
144  }
145 
147 #ifdef __SYCL_DEVICE_ONLY__
148  return __spirv_SubgroupMaxSize();
149 #else
150  throw runtime_error("Sub-groups are not supported on host device.",
152 #endif
153  }
154 
156 #ifdef __SYCL_DEVICE_ONLY__
157  return __spirv_SubgroupId();
158 #else
159  throw runtime_error("Sub-groups are not supported on host device.",
161 #endif
162  }
163 
165 #ifdef __SYCL_DEVICE_ONLY__
166  return static_cast<linear_id_type>(get_group_id()[0]);
167 #else
168  throw runtime_error("Sub-groups are not supported on host device.",
170 #endif
171  }
172 
174 #ifdef __SYCL_DEVICE_ONLY__
175  return __spirv_NumSubgroups();
176 #else
177  throw runtime_error("Sub-groups are not supported on host device.",
179 #endif
180  }
181 
182  template <typename T>
185  T>;
186 
187  /* --- one-input shuffles --- */
188  /* indices in [0 , sub_group size) */
189 
190  template <typename T> T shuffle(T x, id_type local_id) const {
191 #ifdef __SYCL_DEVICE_ONLY__
192  return sycl::detail::spirv::SubgroupShuffle(x, local_id);
193 #else
194  (void)x;
195  (void)local_id;
196  throw runtime_error("Sub-groups are not supported on host device.",
198 #endif
199  }
200 
201  template <typename T> T shuffle_down(T x, uint32_t delta) const {
202 #ifdef __SYCL_DEVICE_ONLY__
203  return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
204 #else
205  (void)x;
206  (void)delta;
207  throw runtime_error("Sub-groups are not supported on host device.",
209 #endif
210  }
211 
212  template <typename T> T shuffle_up(T x, uint32_t delta) const {
213 #ifdef __SYCL_DEVICE_ONLY__
214  return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
215 #else
216  (void)x;
217  (void)delta;
218  throw runtime_error("Sub-groups are not supported on host device.",
220 #endif
221  }
222 
223  template <typename T> T shuffle_xor(T x, id_type value) const {
224 #ifdef __SYCL_DEVICE_ONLY__
225  return sycl::detail::spirv::SubgroupShuffleXor(x, value);
226 #else
227  (void)x;
228  (void)value;
229  throw runtime_error("Sub-groups are not supported on host device.",
231 #endif
232  }
233 
234  /* --- sub_group load/stores --- */
235  /* these can map to SIMD or block read/write hardware where available */
236 #ifdef __SYCL_DEVICE_ONLY__
237  // Method for decorated pointer
238  template <typename CVT, typename T = std::remove_cv_t<CVT>>
240  !std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
241  load(CVT *cv_src) const {
242  T *src = const_cast<T *>(cv_src);
243  return load(sycl::multi_ptr<typename detail::remove_AS<T>::type,
244  sycl::detail::deduce_AS<T>::value>(
245  (typename detail::remove_AS<T>::type *)src));
246  }
247 
248  // Method for raw pointer
249  template <typename CVT, typename T = std::remove_cv_t<CVT>>
251  std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
252  load(CVT *cv_src) const {
253  T *src = const_cast<T *>(cv_src);
254 
255 #ifdef __NVPTX__
256  return src[get_local_id()[0]];
257 #else // __NVPTX__
258  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
259  if (l)
260  return load(l);
261 
262  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
263  if (g)
264  return load(g);
265 
266  assert(!"Sub-group load() is supported for local or global pointers only.");
267  return {};
268 #endif // __NVPTX__
269  }
270 #else //__SYCL_DEVICE_ONLY__
271  template <typename CVT, typename T = std::remove_cv_t<CVT>>
272  T load(CVT *src) const {
273  (void)src;
274  throw runtime_error("Sub-groups are not supported on host device.",
276  }
277 #endif //__SYCL_DEVICE_ONLY__
278 
279  template <typename CVT, access::address_space Space,
280  typename T = std::remove_cv_t<CVT>>
283  load(const multi_ptr<CVT, Space> cv_src) const {
284  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
285 #ifdef __SYCL_DEVICE_ONLY__
286 #ifdef __NVPTX__
287  return src.get()[get_local_id()[0]];
288 #else
289  return sycl::detail::sub_group::load(src);
290 #endif // __NVPTX__
291 #else
292  (void)src;
293  throw runtime_error("Sub-groups are not supported on host device.",
295 #endif
296  }
297 
298  template <typename CVT, access::address_space Space,
299  typename T = std::remove_cv_t<CVT>>
302  load(const multi_ptr<CVT, Space> cv_src) const {
303  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
304 #ifdef __SYCL_DEVICE_ONLY__
305  return src.get()[get_local_id()[0]];
306 #else
307  (void)src;
308  throw runtime_error("Sub-groups are not supported on host device.",
310 #endif
311  }
312 #ifdef __SYCL_DEVICE_ONLY__
313 #ifdef __NVPTX__
314  template <int N, typename CVT, access::address_space Space,
315  typename T = std::remove_cv_t<CVT>>
318  vec<T, N>>
319  load(const multi_ptr<CVT, Space> cv_src) const {
320  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
321  vec<T, N> res;
322  for (int i = 0; i < N; ++i) {
323  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
324  }
325  return res;
326  }
327 #else // __NVPTX__
328  template <int N, typename CVT, access::address_space Space,
329  typename T = std::remove_cv_t<CVT>>
332  N != 1 && N != 3 && N != 16,
333  vec<T, N>>
334  load(const multi_ptr<CVT, Space> cv_src) const {
335  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
336  return sycl::detail::sub_group::load<N, T>(src);
337  }
338 
339  template <int N, typename CVT, access::address_space Space,
340  typename T = std::remove_cv_t<CVT>>
343  N == 16,
344  vec<T, 16>>
345  load(const multi_ptr<CVT, Space> cv_src) const {
346  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
347  return {sycl::detail::sub_group::load<8, T>(src),
348  sycl::detail::sub_group::load<8, T>(src +
349  8 * get_max_local_range()[0])};
350  }
351 
352  template <int N, typename CVT, access::address_space Space,
353  typename T = std::remove_cv_t<CVT>>
355  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
356  N == 3,
357  vec<T, 3>>
358  load(const multi_ptr<CVT, Space> cv_src) const {
359  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
360  return {
361  sycl::detail::sub_group::load<1, T>(src),
362  sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
363  }
364 
365  template <int N, typename CVT, access::address_space Space,
366  typename T = std::remove_cv_t<CVT>>
368  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
369  N == 1,
370  vec<T, 1>>
371  load(const multi_ptr<CVT, Space> cv_src) const {
372  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
373  return sycl::detail::sub_group::load(src);
374  }
375 #endif // ___NVPTX___
376 #else // __SYCL_DEVICE_ONLY__
377  template <int N, typename CVT, access::address_space Space,
378  typename T = std::remove_cv_t<CVT>>
380  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
381  vec<T, N>>
382  load(const multi_ptr<CVT, Space> src) const {
383  (void)src;
384  throw runtime_error("Sub-groups are not supported on host device.",
386  }
387 #endif // __SYCL_DEVICE_ONLY__
388 
389  template <int N, typename CVT, access::address_space Space,
390  typename T = std::remove_cv_t<CVT>>
393  vec<T, N>>
394  load(const multi_ptr<CVT, Space> cv_src) const {
395  multi_ptr<T, Space> src = const_cast<T *>(static_cast<CVT *>(cv_src));
396 #ifdef __SYCL_DEVICE_ONLY__
397  vec<T, N> res;
398  for (int i = 0; i < N; ++i) {
399  res[i] = *(src.get() + i * get_max_local_range()[0] + get_local_id()[0]);
400  }
401  return res;
402 #else
403  (void)src;
404  throw runtime_error("Sub-groups are not supported on host device.",
406 #endif
407  }
408 
409 #ifdef __SYCL_DEVICE_ONLY__
410  // Method for decorated pointer
411  template <typename T>
413  !std::is_same<typename detail::remove_AS<T>::type, T>::value>
414  store(T *dst, const typename detail::remove_AS<T>::type &x) const {
416  sycl::detail::deduce_AS<T>::value>(
417  (typename detail::remove_AS<T>::type *)dst),
418  x);
419  }
420 
421  // Method for raw pointer
422  template <typename T>
424  std::is_same<typename detail::remove_AS<T>::type, T>::value>
425  store(T *dst, const typename detail::remove_AS<T>::type &x) const {
426 
427 #ifdef __NVPTX__
428  dst[get_local_id()[0]] = x;
429 #else // __NVPTX__
430  auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
431  if (l) {
432  store(l, x);
433  return;
434  }
435 
436  auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
437  if (g) {
438  store(g, x);
439  return;
440  }
441 
442  assert(
443  !"Sub-group store() is supported for local or global pointers only.");
444  return;
445 #endif // __NVPTX__
446  }
447 #else //__SYCL_DEVICE_ONLY__
448  template <typename T> void store(T *dst, const T &x) const {
449  (void)dst;
450  (void)x;
451  throw runtime_error("Sub-groups are not supported on host device.",
453  }
454 #endif //__SYCL_DEVICE_ONLY__
455 
456  template <typename T, access::address_space Space>
459  store(multi_ptr<T, Space> dst, const T &x) const {
460 #ifdef __SYCL_DEVICE_ONLY__
461 #ifdef __NVPTX__
462  dst.get()[get_local_id()[0]] = x;
463 #else
464  sycl::detail::sub_group::store(dst, x);
465 #endif // __NVPTX__
466 #else
467  (void)dst;
468  (void)x;
469  throw runtime_error("Sub-groups are not supported on host device.",
471 #endif
472  }
473 
474  template <typename T, access::address_space Space>
477  store(multi_ptr<T, Space> dst, const T &x) const {
478 #ifdef __SYCL_DEVICE_ONLY__
479  dst.get()[get_local_id()[0]] = x;
480 #else
481  (void)dst;
482  (void)x;
483  throw runtime_error("Sub-groups are not supported on host device.",
485 #endif
486  }
487 
488 #ifdef __SYCL_DEVICE_ONLY__
489 #ifdef __NVPTX__
490  template <int N, typename T, access::address_space Space>
493  store(multi_ptr<T, Space> dst, const vec<T, N> &x) const {
494  for (int i = 0; i < N; ++i) {
495  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
496  }
497  }
498 #else // __NVPTX__
499  template <int N, typename T, access::address_space Space>
502  N != 1 && N != 3 && N != 16>
503  store(multi_ptr<T, Space> dst, const vec<T, N> &x) const {
504  sycl::detail::sub_group::store(dst, x);
505  }
506 
507  template <int N, typename T, access::address_space Space>
510  N == 1>
511  store(multi_ptr<T, Space> dst, const vec<T, 1> &x) const {
512  sycl::detail::sub_group::store(dst, x);
513  }
514 
515  template <int N, typename T, access::address_space Space>
517  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
518  N == 3>
519  store(multi_ptr<T, Space> dst, const vec<T, 3> &x) const {
520  store<1, T, Space>(dst, x.s0());
521  store<2, T, Space>(dst + get_max_local_range()[0], {x.s1(), x.s2()});
522  }
523 
524  template <int N, typename T, access::address_space Space>
526  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
527  N == 16>
528  store(multi_ptr<T, Space> dst, const vec<T, 16> &x) const {
529  store<8, T, Space>(dst, x.lo());
530  store<8, T, Space>(dst + 8 * get_max_local_range()[0], x.hi());
531  }
532 
533 #endif // __NVPTX__
534 #else // __SYCL_DEVICE_ONLY__
535  template <int N, typename T, access::address_space Space>
537  sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
538  store(multi_ptr<T, Space> dst, const vec<T, N> &x) const {
539  (void)dst;
540  (void)x;
541  throw runtime_error("Sub-groups are not supported on host device.",
543  }
544 #endif // __SYCL_DEVICE_ONLY__
545 
546  template <int N, typename T, access::address_space Space>
549  store(multi_ptr<T, Space> dst, const vec<T, N> &x) const {
550 #ifdef __SYCL_DEVICE_ONLY__
551  for (int i = 0; i < N; ++i) {
552  *(dst.get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
553  }
554 #else
555  (void)dst;
556  (void)x;
557  throw runtime_error("Sub-groups are not supported on host device.",
559 #endif
560  }
561 
562  /* --- synchronization functions --- */
563  void barrier() const {
564 #ifdef __SYCL_DEVICE_ONLY__
571 #else
572  throw runtime_error("Sub-groups are not supported on host device.",
574 #endif
575  }
576 
577  __SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated."
578  "Use barrier() without a fence_space instead.")
579  void barrier(access::fence_space accessSpace) const {
580 #ifdef __SYCL_DEVICE_ONLY__
581  int32_t flags = sycl::detail::getSPIRVMemorySemanticsMask(accessSpace);
583  flags);
584 #else
585  (void)accessSpace;
586  throw runtime_error("Sub-groups are not supported on host device.",
588 #endif
589  }
590 
591  /* --- deprecated collective functions --- */
592  template <typename T>
593  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
594  "sycl::ext::oneapi::broadcast instead.")
595  EnableIfIsScalarArithmetic<T> broadcast(T x, id<1> local_id) const {
596 #ifdef __SYCL_DEVICE_ONLY__
597  return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
598 #else
599  (void)x;
600  (void)local_id;
601  throw runtime_error("Sub-groups are not supported on host device.",
603 #endif
604  }
605 
606  template <typename T, class BinaryOperation>
607  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
608  "sycl::ext::oneapi::reduce instead.")
609  EnableIfIsScalarArithmetic<T> reduce(T x, BinaryOperation op) const {
610 #ifdef __SYCL_DEVICE_ONLY__
611  return sycl::detail::calc<T, __spv::GroupOperation::Reduce,
613  typename sycl::detail::GroupOpTag<T>::type(), x, op);
614 #else
615  (void)x;
616  (void)op;
617  throw runtime_error("Sub-groups are not supported on host device.",
619 #endif
620  }
621 
622  template <typename T, class BinaryOperation>
623  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
624  "sycl::ext::oneapi::reduce instead.")
625  EnableIfIsScalarArithmetic<T> reduce(T x, T init, BinaryOperation op) const {
626 #ifdef __SYCL_DEVICE_ONLY__
627  return op(init, reduce(x, op));
628 #else
629  (void)x;
630  (void)init;
631  (void)op;
632  throw runtime_error("Sub-groups are not supported on host device.",
634 #endif
635  }
636 
637  template <typename T, class BinaryOperation>
638  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
639  "sycl::ext::oneapi::exclusive_scan instead.")
640  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, BinaryOperation op) const {
641 #ifdef __SYCL_DEVICE_ONLY__
642  return sycl::detail::calc<T, __spv::GroupOperation::ExclusiveScan,
644  typename sycl::detail::GroupOpTag<T>::type(), x, op);
645 #else
646  (void)x;
647  (void)op;
648  throw runtime_error("Sub-groups are not supported on host device.",
650 #endif
651  }
652 
653  template <typename T, class BinaryOperation>
654  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
655  "sycl::ext::oneapi::exclusive_scan instead.")
656  EnableIfIsScalarArithmetic<T> exclusive_scan(T x, T init,
657  BinaryOperation op) const {
658 #ifdef __SYCL_DEVICE_ONLY__
659  if (get_local_id().get(0) == 0) {
660  x = op(init, x);
661  }
662  T scan = exclusive_scan(x, op);
663  if (get_local_id().get(0) == 0) {
664  scan = init;
665  }
666  return scan;
667 #else
668  (void)x;
669  (void)init;
670  (void)op;
671  throw runtime_error("Sub-groups are not supported on host device.",
673 #endif
674  }
675 
676  template <typename T, class BinaryOperation>
677  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
678  "sycl::ext::oneapi::inclusive_scan instead.")
679  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op) const {
680 #ifdef __SYCL_DEVICE_ONLY__
681  return sycl::detail::calc<T, __spv::GroupOperation::InclusiveScan,
683  typename sycl::detail::GroupOpTag<T>::type(), x, op);
684 #else
685  (void)x;
686  (void)op;
687  throw runtime_error("Sub-groups are not supported on host device.",
689 #endif
690  }
691 
692  template <typename T, class BinaryOperation>
693  __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use "
694  "sycl::ext::oneapi::inclusive_scan instead.")
695  EnableIfIsScalarArithmetic<T> inclusive_scan(T x, BinaryOperation op,
696  T init) const {
697 #ifdef __SYCL_DEVICE_ONLY__
698  if (get_local_id().get(0) == 0) {
699  x = op(init, x);
700  }
701  return inclusive_scan(x, op);
702 #else
703  (void)x;
704  (void)op;
705  (void)init;
706  throw runtime_error("Sub-groups are not supported on host device.",
708 #endif
709  }
710 
711  linear_id_type get_group_linear_range() const {
712 #ifdef __SYCL_DEVICE_ONLY__
713  return static_cast<linear_id_type>(get_group_range()[0]);
714 #else
715  throw runtime_error("Sub-groups are not supported on host device.",
717 #endif
718  }
719 
720  linear_id_type get_local_linear_range() const {
721 #ifdef __SYCL_DEVICE_ONLY__
722  return static_cast<linear_id_type>(get_local_range()[0]);
723 #else
724  throw runtime_error("Sub-groups are not supported on host device.",
726 #endif
727  }
728 
729  bool leader() const {
730 #ifdef __SYCL_DEVICE_ONLY__
731  return get_local_linear_id() == 0;
732 #else
733  throw runtime_error("Sub-groups are not supported on host device.",
735 #endif
736  }
737 
738 protected:
739  template <int dimensions> friend class cl::sycl::nd_item;
740  friend sub_group this_sub_group();
742  sub_group() = default;
743 };
744 
746  "use sycl::ext::oneapi::experimental::this_sub_group() instead")
747 inline sub_group this_sub_group() {
748 #ifdef __SYCL_DEVICE_ONLY__
749  return sub_group();
750 #else
751  throw runtime_error("Sub-groups are not supported on host device.",
753 #endif
754 }
755 
756 } // namespace oneapi
757 } // namespace ext
758 
759 } // namespace sycl
760 } // __SYCL_INLINE_NAMESPACE(cl)
spirv_ops.hpp
__spirv_ControlBarrier
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
Definition: spirv_ops.cpp:26
__spv::MemorySemanticsMask::SubgroupMemory
@ SubgroupMemory
Definition: spirv_types.hpp:91
cl::sycl::ext::oneapi::leader
detail::enable_if_t< detail::is_generic_group< Group >::value, bool > leader(Group g)
Definition: group_algorithm.hpp:536
cl::sycl::detail::sub_group::SelectBlockT
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
Definition: sub_group.hpp:37
cl::sycl::ext::oneapi::sub_group::load
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space > src) const
Definition: sub_group.hpp:382
cl::__ESIMD_NS::atomic_op::load
@ load
T
cl::sycl::ext::oneapi::experimental::this_sub_group
sub_group this_sub_group()
Definition: sub_group.hpp:23
cl::sycl::detail::sub_group::AcceptableForGlobalLoadStore
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
Definition: sub_group.hpp:42
type_traits.hpp
cl::sycl::multi_ptr::get
pointer_t get() const
Definition: multi_ptr.hpp:232
cl::sycl::ext::oneapi::sub_group::store
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:538
cl::sycl::ext::oneapi::sub_group::get_group_id
id_type get_group_id() const
Definition: sub_group.hpp:155
cl::sycl::detail::sub_group::AcceptableForLocalLoadStore
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
Definition: sub_group.hpp:47
cl::sycl::id< 1 >
__SYCL_DEPRECATED
#define __SYCL_DEPRECATED(message)
Definition: defines_elementary.hpp:47
cl::sycl::memory_scope::sub_group
@ sub_group
cl::__ESIMD_NS::barrier
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:891
cl::sycl::detail::get_local_linear_range
size_t get_local_linear_range(Group g)
cl::sycl::detail::get_local_linear_id
Group::linear_id_type get_local_linear_id(Group g)
helpers.hpp
spirv_vars.hpp
sycl
Definition: invoke_simd.hpp:68
bool_constant
__spv::MemorySemanticsMask::AcquireRelease
@ AcquireRelease
Definition: spirv_types.hpp:88
cl::sycl::multi_ptr
Provides constructors for address space qualified and non address space qualified pointers to allow i...
Definition: atomic.hpp:32
__spv::MemorySemanticsMask::CrossWorkgroupMemory
@ CrossWorkgroupMemory
Definition: spirv_types.hpp:93
access.hpp
cl::sycl::ext::oneapi::sub_group::store
void store(T *dst, const T &x) const
Definition: sub_group.hpp:448
id.hpp
cl::sycl::range< 1 >
cl::sycl::ext::oneapi::sub_group::store
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space > dst, const T &x) const
Definition: sub_group.hpp:477
cl::sycl::access::fence_space
fence_space
Definition: access.hpp:37
cl::sycl::detail::ConvertToOpenCLType_t
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
Definition: generic_type_traits.hpp:472
cl::sycl::memory_scope
memory_scope
Definition: memory_enums.hpp:26
__spv::GroupOperation::ExclusiveScan
@ ExclusiveScan
cl::sycl::detail::getSPIRVMemorySemanticsMask
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
Definition: helpers.hpp:200
cl::sycl::ext::oneapi::sub_group::get_group_linear_id
linear_id_type get_group_linear_id() const
Definition: sub_group.hpp:164
cl::sycl::ext::oneapi::sub_group::shuffle_down
T shuffle_down(T x, uint32_t delta) const
Definition: sub_group.hpp:201
functional.hpp
__spv::Scope::Subgroup
@ Subgroup
Definition: spirv_types.hpp:31
cl::sycl::ext::oneapi::sub_group::shuffle_up
T shuffle_up(T x, uint32_t delta) const
Definition: sub_group.hpp:212
generic_type_traits.hpp
range.hpp
__spv::GroupOperation::InclusiveScan
@ InclusiveScan
cl::sycl::ext::oneapi::sub_group::shuffle
T shuffle(T x, id_type local_id) const
Definition: sub_group.hpp:190
cl::sycl::ext::oneapi::sub_group::load
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space > cv_src) const
Definition: sub_group.hpp:394
spirv.hpp
defines.hpp
cl
We provide new interfaces for matrix muliply in this patch:
Definition: access.hpp:13
cl::sycl::ext::oneapi::sub_group::barrier
void barrier() const
Definition: sub_group.hpp:563
cl::sycl::ext::oneapi::sub_group::load
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space > cv_src) const
Definition: sub_group.hpp:283
cl::sycl::access::address_space
address_space
Definition: access.hpp:45
cl::sycl::ext::oneapi::sub_group
Definition: sub_group.hpp:108
__spv::MemorySemanticsMask::WorkgroupMemory
@ WorkgroupMemory
Definition: spirv_types.hpp:92
std::get
constexpr tuple_element< I, tuple< Types... > >::type & get(cl::sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
cl::sycl::ext::oneapi::sub_group::store
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space > dst, const vec< T, N > &x) const
Definition: sub_group.hpp:549
cl::sycl::vec
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
Definition: aliases.hpp:18
cl::sycl::nd_item
Identifies an instance of the function object executing at each point in an nd_range.
Definition: helpers.hpp:32
cl::sycl::ext::oneapi::sub_group::linear_id_type
uint32_t linear_id_type
Definition: sub_group.hpp:112
reduce
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
cl::sycl::ext::oneapi::sub_group::get_local_linear_id
linear_id_type get_local_linear_id() const
Definition: sub_group.hpp:128
cl::sycl::ext::oneapi::sub_group::get_local_range
range_type get_local_range() const
Definition: sub_group.hpp:137
memory_enums.hpp
cl::sycl::ext::oneapi::sub_group::load
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space > cv_src) const
Definition: sub_group.hpp:302
cl::sycl::ext::oneapi::sub_group::get_local_id
id_type get_local_id() const
Definition: sub_group.hpp:119
cl::sycl::ext::oneapi::sub_group::store
sycl::detail::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space > dst, const T &x) const
Definition: sub_group.hpp:459
cl::sycl::ext::oneapi::sub_group::load
T load(CVT *src) const
Definition: sub_group.hpp:272
types.hpp
cl::sycl::ext::oneapi::sub_group::shuffle_xor
T shuffle_xor(T x, id_type value) const
Definition: sub_group.hpp:223
cl::sycl::detail::enable_if_t
typename std::enable_if< B, T >::type enable_if_t
Definition: stl_type_traits.hpp:24
cl::sycl::ext::oneapi::sub_group::get_group_range
range_type get_group_range() const
Definition: sub_group.hpp:173
__spv::GroupOperation::Reduce
@ Reduce
cl::__ESIMD_NS::atomic_op::store
@ store
cl::sycl::detail::select_cl_scalar_integral_unsigned_t
select_apply_cl_scalar_t< T, sycl::cl_uchar, sycl::cl_ushort, sycl::cl_uint, sycl::cl_ulong > select_cl_scalar_integral_unsigned_t
Definition: generic_type_traits.hpp:376
PI_INVALID_DEVICE
@ PI_INVALID_DEVICE
Definition: pi.h:94
cl::sycl::ext::oneapi::EnableIfIsScalarArithmetic
cl::sycl::detail::enable_if_t< cl::sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
Definition: group_algorithm.hpp:30
cl::sycl::ext::oneapi::sub_group::get_max_local_range
range_type get_max_local_range() const
Definition: sub_group.hpp:146
cl::sycl::detail::remove_AS::type
T type
Definition: access.hpp:204
__SYCL_INLINE_NAMESPACE
#define __SYCL_INLINE_NAMESPACE(X)
Definition: defines_elementary.hpp:12