25 #include <type_traits>
41 template <
typename T, access::address_space Space>
43 std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
44 Space == access::address_space::global_space>;
46 template <
typename T, access::address_space Space>
48 std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
49 Space == access::address_space::local_space>;
51 #ifdef __SYCL_DEVICE_ONLY__
60 __spirv_SubgroupBlockReadINTEL<BlockT>(
reinterpret_cast<PtrT
>(src.
get()));
62 return sycl::bit_cast<T>(Ret);
68 using BlockT = SelectBlockT<T>;
69 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
74 __spirv_SubgroupBlockReadINTEL<VecT>(
reinterpret_cast<PtrT
>(src.
get()));
76 return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
82 using BlockT = SelectBlockT<T>;
86 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.
get()),
87 sycl::bit_cast<BlockT>(x));
92 void store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, N> &x) {
93 using BlockT = SelectBlockT<T>;
94 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
96 const multi_ptr<BlockT, Space, DecorateAddress>>;
98 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.get()),
99 sycl::bit_cast<VecT>(x));
101 #endif // __SYCL_DEVICE_ONLY__
109 inline multi_ptr<T, Space, IsDecorated>
111 if constexpr (IsDecorated == access::decorated::legacy) {
124 namespace ext::oneapi {
127 namespace experimental {
136 static constexpr
int dimensions = 1;
138 sycl::memory_scope::sub_group;
143 #ifdef __SYCL_DEVICE_ONLY__
144 return __spirv_SubgroupLocalInvocationId();
146 throw runtime_error(
"Sub-groups are not supported on host device.",
147 PI_ERROR_INVALID_DEVICE);
152 #ifdef __SYCL_DEVICE_ONLY__
155 throw runtime_error(
"Sub-groups are not supported on host device.",
156 PI_ERROR_INVALID_DEVICE);
161 #ifdef __SYCL_DEVICE_ONLY__
162 return __spirv_SubgroupSize();
164 throw runtime_error(
"Sub-groups are not supported on host device.",
165 PI_ERROR_INVALID_DEVICE);
170 #ifdef __SYCL_DEVICE_ONLY__
171 return __spirv_SubgroupMaxSize();
173 throw runtime_error(
"Sub-groups are not supported on host device.",
174 PI_ERROR_INVALID_DEVICE);
179 #ifdef __SYCL_DEVICE_ONLY__
180 return __spirv_SubgroupId();
182 throw runtime_error(
"Sub-groups are not supported on host device.",
183 PI_ERROR_INVALID_DEVICE);
188 #ifdef __SYCL_DEVICE_ONLY__
191 throw runtime_error(
"Sub-groups are not supported on host device.",
192 PI_ERROR_INVALID_DEVICE);
197 #ifdef __SYCL_DEVICE_ONLY__
198 return __spirv_NumSubgroups();
200 throw runtime_error(
"Sub-groups are not supported on host device.",
201 PI_ERROR_INVALID_DEVICE);
205 template <
typename T>
206 using EnableIfIsScalarArithmetic =
207 std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
213 #ifdef __SYCL_DEVICE_ONLY__
214 return sycl::detail::spirv::SubgroupShuffle(x, local_id);
218 throw runtime_error(
"Sub-groups are not supported on host device.",
219 PI_ERROR_INVALID_DEVICE);
224 #ifdef __SYCL_DEVICE_ONLY__
225 return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
229 throw runtime_error(
"Sub-groups are not supported on host device.",
230 PI_ERROR_INVALID_DEVICE);
234 template <
typename T> T
shuffle_up(T x, uint32_t delta)
const {
235 #ifdef __SYCL_DEVICE_ONLY__
236 return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
240 throw runtime_error(
"Sub-groups are not supported on host device.",
241 PI_ERROR_INVALID_DEVICE);
246 #ifdef __SYCL_DEVICE_ONLY__
247 return sycl::detail::spirv::SubgroupShuffleXor(x, value);
251 throw runtime_error(
"Sub-groups are not supported on host device.",
252 PI_ERROR_INVALID_DEVICE);
258 #ifdef __SYCL_DEVICE_ONLY__
260 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
261 std::enable_if_t<!std::is_same<remove_decoration_t<T>,
T>::value,
T>
262 load(CVT *cv_src)
const {
263 T *src =
const_cast<T *
>(cv_src);
265 sycl::detail::deduce_AS<T>::value,
266 sycl::access::decorated::yes>(src));
270 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
271 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
272 load(CVT *cv_src)
const {
273 T *src =
const_cast<T *
>(cv_src);
276 return src[get_local_id()[0]];
278 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
282 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
286 assert(!
"Sub-group load() is supported for local or global pointers only.");
290 #else //__SYCL_DEVICE_ONLY__
291 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
294 throw runtime_error(
"Sub-groups are not supported on host device.",
295 PI_ERROR_INVALID_DEVICE);
297 #endif //__SYCL_DEVICE_ONLY__
302 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
T>
306 #ifdef __SYCL_DEVICE_ONLY__
308 return src.
get()[get_local_id()[0]];
310 return sycl::detail::sub_group::load(src);
314 throw runtime_error(
"Sub-groups are not supported on host device.",
315 PI_ERROR_INVALID_DEVICE);
322 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value,
T>
326 #ifdef __SYCL_DEVICE_ONLY__
327 return src.
get()[get_local_id()[0]];
330 throw runtime_error(
"Sub-groups are not supported on host device.",
331 PI_ERROR_INVALID_DEVICE);
334 #ifdef __SYCL_DEVICE_ONLY__
339 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
345 for (
int i = 0; i < N; ++i) {
346 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
354 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
355 N != 1 && N != 3 && N != 16,
360 return sycl::detail::sub_group::load<N, T>(src);
366 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
369 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
370 multi_ptr<T, Space, IsDecorated> src =
372 return {sycl::detail::sub_group::load<8, T>(src),
373 sycl::detail::sub_group::load<8, T>(src +
374 8 * get_max_local_range()[0])};
380 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
383 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
384 multi_ptr<T, Space, IsDecorated> src =
387 sycl::detail::sub_group::load<1, T>(src),
388 sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
394 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
397 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
398 multi_ptr<T, Space, IsDecorated> src =
400 return sycl::detail::sub_group::load(src);
402 #endif // ___NVPTX___
403 #else // __SYCL_DEVICE_ONLY__
407 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
411 throw runtime_error(
"Sub-groups are not supported on host device.",
412 PI_ERROR_INVALID_DEVICE);
414 #endif // __SYCL_DEVICE_ONLY__
419 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value,
424 #ifdef __SYCL_DEVICE_ONLY__
426 for (
int i = 0; i < N; ++i) {
427 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
432 throw runtime_error(
"Sub-groups are not supported on host device.",
433 PI_ERROR_INVALID_DEVICE);
437 #ifdef __SYCL_DEVICE_ONLY__
439 template <
typename T>
440 std::enable_if_t<!std::is_same<remove_decoration_t<T>,
T>::value>
443 sycl::detail::deduce_AS<T>::value,
444 sycl::access::decorated::yes>(dst),
449 template <
typename T>
450 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
454 dst[get_local_id()[0]] = x;
456 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
462 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
469 !
"Sub-group store() is supported for local or global pointers only.");
473 #else //__SYCL_DEVICE_ONLY__
474 template <
typename T>
void store(T *dst,
const T &x)
const {
477 throw runtime_error(
"Sub-groups are not supported on host device.",
478 PI_ERROR_INVALID_DEVICE);
480 #endif //__SYCL_DEVICE_ONLY__
485 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
487 #ifdef __SYCL_DEVICE_ONLY__
489 dst.
get()[get_local_id()[0]] = x;
491 sycl::detail::sub_group::store(dst, x);
496 throw runtime_error(
"Sub-groups are not supported on host device.",
497 PI_ERROR_INVALID_DEVICE);
504 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
506 #ifdef __SYCL_DEVICE_ONLY__
507 dst.
get()[get_local_id()[0]] = x;
511 throw runtime_error(
"Sub-groups are not supported on host device.",
512 PI_ERROR_INVALID_DEVICE);
516 #ifdef __SYCL_DEVICE_ONLY__
521 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
523 for (
int i = 0; i < N; ++i) {
524 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
531 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
532 N != 1 && N != 3 && N != 16>
534 sycl::detail::sub_group::store(dst, x);
540 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
542 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 1> &x)
const {
543 sycl::detail::sub_group::store(dst, x);
549 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
551 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 3> &x)
const {
552 store<1, T, Space, DecorateAddress>(dst, x.s0());
553 store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
560 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
562 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 16> &x)
const {
563 store<8, T, Space, DecorateAddress>(dst,
x.lo());
564 store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
569 #else // __SYCL_DEVICE_ONLY__
573 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
577 throw runtime_error(
"Sub-groups are not supported on host device.",
578 PI_ERROR_INVALID_DEVICE);
580 #endif // __SYCL_DEVICE_ONLY__
585 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
587 #ifdef __SYCL_DEVICE_ONLY__
588 for (
int i = 0; i < N; ++i) {
589 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
594 throw runtime_error(
"Sub-groups are not supported on host device.",
595 PI_ERROR_INVALID_DEVICE);
601 #ifdef __SYCL_DEVICE_ONLY__
609 throw runtime_error(
"Sub-groups are not supported on host device.",
610 PI_ERROR_INVALID_DEVICE);
615 "Use barrier() without a fence_space instead.")
617 #ifdef __SYCL_DEVICE_ONLY__
623 throw runtime_error(
"Sub-groups are not supported on host device.",
624 PI_ERROR_INVALID_DEVICE);
629 template <
typename T>
631 "sycl::ext::oneapi::broadcast instead.")
632 EnableIfIsScalarArithmetic<T> broadcast(T x,
id<1> local_id)
const {
633 #ifdef __SYCL_DEVICE_ONLY__
634 return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
638 throw runtime_error(
"Sub-groups are not supported on host device.",
639 PI_ERROR_INVALID_DEVICE);
643 template <
typename T,
class BinaryOperation>
645 "sycl::ext::oneapi::reduce instead.")
646 EnableIfIsScalarArithmetic<
T>
reduce(
T x, BinaryOperation op)
const {
647 #ifdef __SYCL_DEVICE_ONLY__
648 return sycl::detail::calc<__spv::GroupOperation::Reduce>(
649 typename sycl::detail::GroupOpTag<T>::type(), *
this, x, op);
653 throw runtime_error(
"Sub-groups are not supported on host device.",
654 PI_ERROR_INVALID_DEVICE);
658 template <
typename T,
class BinaryOperation>
660 "sycl::ext::oneapi::reduce instead.")
661 EnableIfIsScalarArithmetic<T>
reduce(T x, T init, BinaryOperation op)
const {
662 #ifdef __SYCL_DEVICE_ONLY__
663 return op(init,
reduce(x, op));
668 throw runtime_error(
"Sub-groups are not supported on host device.",
669 PI_ERROR_INVALID_DEVICE);
673 template <
typename T,
class BinaryOperation>
675 "sycl::ext::oneapi::exclusive_scan instead.")
676 EnableIfIsScalarArithmetic<
T> exclusive_scan(
T x, BinaryOperation op)
const {
677 #ifdef __SYCL_DEVICE_ONLY__
678 return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
679 typename sycl::detail::GroupOpTag<T>::type(), *
this, x, op);
683 throw runtime_error(
"Sub-groups are not supported on host device.",
684 PI_ERROR_INVALID_DEVICE);
688 template <
typename T,
class BinaryOperation>
690 "sycl::ext::oneapi::exclusive_scan instead.")
691 EnableIfIsScalarArithmetic<
T> exclusive_scan(
T x,
T init,
692 BinaryOperation op)
const {
693 #ifdef __SYCL_DEVICE_ONLY__
694 if (get_local_id().
get(0) == 0) {
697 T scan = exclusive_scan(x, op);
698 if (get_local_id().
get(0) == 0) {
706 throw runtime_error(
"Sub-groups are not supported on host device.",
707 PI_ERROR_INVALID_DEVICE);
711 template <
typename T,
class BinaryOperation>
713 "sycl::ext::oneapi::inclusive_scan instead.")
714 EnableIfIsScalarArithmetic<
T> inclusive_scan(
T x, BinaryOperation op)
const {
715 #ifdef __SYCL_DEVICE_ONLY__
716 return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>(
717 typename sycl::detail::GroupOpTag<T>::type(), *
this, x, op);
721 throw runtime_error(
"Sub-groups are not supported on host device.",
722 PI_ERROR_INVALID_DEVICE);
726 template <
typename T,
class BinaryOperation>
728 "sycl::ext::oneapi::inclusive_scan instead.")
729 EnableIfIsScalarArithmetic<
T> inclusive_scan(
T x, BinaryOperation op,
731 #ifdef __SYCL_DEVICE_ONLY__
732 if (get_local_id().
get(0) == 0) {
735 return inclusive_scan(x, op);
740 throw runtime_error(
"Sub-groups are not supported on host device.",
741 PI_ERROR_INVALID_DEVICE);
745 linear_id_type get_group_linear_range()
const {
746 #ifdef __SYCL_DEVICE_ONLY__
747 return static_cast<linear_id_type
>(get_group_range()[0]);
749 throw runtime_error(
"Sub-groups are not supported on host device.",
750 PI_ERROR_INVALID_DEVICE);
755 #ifdef __SYCL_DEVICE_ONLY__
756 return static_cast<linear_id_type
>(get_local_range()[0]);
758 throw runtime_error(
"Sub-groups are not supported on host device.",
759 PI_ERROR_INVALID_DEVICE);
763 bool leader()
const {
764 #ifdef __SYCL_DEVICE_ONLY__
767 throw runtime_error(
"Sub-groups are not supported on host device.",
768 PI_ERROR_INVALID_DEVICE);
773 template <
int dimensions>
friend class sycl::nd_item;
780 "use sycl::ext::oneapi::experimental::this_sub_group() instead")
782 #ifdef __SYCL_DEVICE_ONLY__
785 throw runtime_error(
"Sub-groups are not supported on host device.",
786 PI_ERROR_INVALID_DEVICE);