25 #include <type_traits>
29 template <
typename T, access::address_space Space>
class multi_ptr;
39 template <
typename T, access::address_space Space>
42 Space == access::address_space::global_space>;
44 template <
typename T, access::address_space Space>
47 Space == access::address_space::local_space>;
49 #ifdef __SYCL_DEVICE_ONLY__
50 template <
typename T, access::address_space Space>
57 __spirv_SubgroupBlockReadINTEL<BlockT>(
reinterpret_cast<PtrT
>(src.
get()));
59 return sycl::bit_cast<T>(Ret);
62 template <
int N,
typename T, access::address_space Space>
64 using BlockT = SelectBlockT<T>;
70 __spirv_SubgroupBlockReadINTEL<VecT>(
reinterpret_cast<PtrT
>(src.
get()));
72 return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
75 template <
typename T, access::address_space Space>
77 using BlockT = SelectBlockT<T>;
80 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.
get()),
81 sycl::bit_cast<BlockT>(x));
84 template <
int N,
typename T, access::address_space Space>
85 void store(multi_ptr<T, Space> dst,
const vec<T, N> &x) {
86 using BlockT = SelectBlockT<T>;
91 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.get()),
92 sycl::bit_cast<VecT>(x));
94 #endif // __SYCL_DEVICE_ONLY__
104 namespace experimental {
113 static constexpr
int dimensions = 1;
115 sycl::memory_scope::sub_group;
120 #ifdef __SYCL_DEVICE_ONLY__
121 return __spirv_SubgroupLocalInvocationId();
123 throw runtime_error(
"Sub-groups are not supported on host device.",
129 #ifdef __SYCL_DEVICE_ONLY__
132 throw runtime_error(
"Sub-groups are not supported on host device.",
138 #ifdef __SYCL_DEVICE_ONLY__
139 return __spirv_SubgroupSize();
141 throw runtime_error(
"Sub-groups are not supported on host device.",
147 #ifdef __SYCL_DEVICE_ONLY__
148 return __spirv_SubgroupMaxSize();
150 throw runtime_error(
"Sub-groups are not supported on host device.",
156 #ifdef __SYCL_DEVICE_ONLY__
157 return __spirv_SubgroupId();
159 throw runtime_error(
"Sub-groups are not supported on host device.",
165 #ifdef __SYCL_DEVICE_ONLY__
168 throw runtime_error(
"Sub-groups are not supported on host device.",
174 #ifdef __SYCL_DEVICE_ONLY__
175 return __spirv_NumSubgroups();
177 throw runtime_error(
"Sub-groups are not supported on host device.",
182 template <
typename T>
191 #ifdef __SYCL_DEVICE_ONLY__
192 return sycl::detail::spirv::SubgroupShuffle(x, local_id);
196 throw runtime_error(
"Sub-groups are not supported on host device.",
202 #ifdef __SYCL_DEVICE_ONLY__
203 return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
207 throw runtime_error(
"Sub-groups are not supported on host device.",
212 template <
typename T> T
shuffle_up(T x, uint32_t delta)
const {
213 #ifdef __SYCL_DEVICE_ONLY__
214 return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
218 throw runtime_error(
"Sub-groups are not supported on host device.",
224 #ifdef __SYCL_DEVICE_ONLY__
225 return sycl::detail::spirv::SubgroupShuffleXor(x, value);
229 throw runtime_error(
"Sub-groups are not supported on host device.",
236 #ifdef __SYCL_DEVICE_ONLY__
238 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
240 !std::is_same<typename detail::remove_AS<T>::type,
T>::value,
T>
241 load(CVT *cv_src)
const {
242 T *src =
const_cast<T *
>(cv_src);
244 sycl::detail::deduce_AS<T>::value>(
249 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
251 std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
252 load(CVT *cv_src)
const {
253 T *src =
const_cast<T *
>(cv_src);
256 return src[get_local_id()[0]];
258 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
262 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
266 assert(!
"Sub-group load() is supported for local or global pointers only.");
270 #else //__SYCL_DEVICE_ONLY__
271 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
274 throw runtime_error(
"Sub-groups are not supported on host device.",
277 #endif //__SYCL_DEVICE_ONLY__
280 typename T = std::remove_cv_t<CVT>>
285 #ifdef __SYCL_DEVICE_ONLY__
287 return src.
get()[get_local_id()[0]];
289 return sycl::detail::sub_group::load(src);
293 throw runtime_error(
"Sub-groups are not supported on host device.",
299 typename T = std::remove_cv_t<CVT>>
304 #ifdef __SYCL_DEVICE_ONLY__
305 return src.
get()[get_local_id()[0]];
308 throw runtime_error(
"Sub-groups are not supported on host device.",
312 #ifdef __SYCL_DEVICE_ONLY__
315 typename T = std::remove_cv_t<CVT>>
322 for (
int i = 0; i < N; ++i) {
323 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
329 typename T = std::remove_cv_t<CVT>>
332 N != 1 && N != 3 && N != 16,
336 return sycl::detail::sub_group::load<N, T>(src);
340 typename T = std::remove_cv_t<CVT>>
345 load(
const multi_ptr<CVT, Space> cv_src)
const {
346 multi_ptr<T, Space> src =
const_cast<T *
>(
static_cast<CVT *
>(cv_src));
347 return {sycl::detail::sub_group::load<8, T>(src),
348 sycl::detail::sub_group::load<8, T>(src +
349 8 * get_max_local_range()[0])};
353 typename T = std::remove_cv_t<CVT>>
355 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
358 load(
const multi_ptr<CVT, Space> cv_src)
const {
359 multi_ptr<T, Space> src =
const_cast<T *
>(
static_cast<CVT *
>(cv_src));
361 sycl::detail::sub_group::load<1, T>(src),
362 sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
366 typename T = std::remove_cv_t<CVT>>
368 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
371 load(
const multi_ptr<CVT, Space> cv_src)
const {
372 multi_ptr<T, Space> src =
const_cast<T *
>(
static_cast<CVT *
>(cv_src));
373 return sycl::detail::sub_group::load(src);
375 #endif // ___NVPTX___
376 #else // __SYCL_DEVICE_ONLY__
378 typename T = std::remove_cv_t<CVT>>
380 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
384 throw runtime_error(
"Sub-groups are not supported on host device.",
387 #endif // __SYCL_DEVICE_ONLY__
390 typename T = std::remove_cv_t<CVT>>
396 #ifdef __SYCL_DEVICE_ONLY__
398 for (
int i = 0; i < N; ++i) {
399 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
404 throw runtime_error(
"Sub-groups are not supported on host device.",
409 #ifdef __SYCL_DEVICE_ONLY__
411 template <
typename T>
413 !std::is_same<typename detail::remove_AS<T>::type,
T>::value>
416 sycl::detail::deduce_AS<T>::value>(
422 template <
typename T>
424 std::is_same<typename detail::remove_AS<T>::type, T>::value>
428 dst[get_local_id()[0]] = x;
430 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
436 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
443 !
"Sub-group store() is supported for local or global pointers only.");
447 #else //__SYCL_DEVICE_ONLY__
448 template <
typename T>
void store(T *dst,
const T &x)
const {
451 throw runtime_error(
"Sub-groups are not supported on host device.",
454 #endif //__SYCL_DEVICE_ONLY__
456 template <
typename T, access::address_space Space>
460 #ifdef __SYCL_DEVICE_ONLY__
462 dst.
get()[get_local_id()[0]] = x;
464 sycl::detail::sub_group::store(dst, x);
469 throw runtime_error(
"Sub-groups are not supported on host device.",
474 template <
typename T, access::address_space Space>
478 #ifdef __SYCL_DEVICE_ONLY__
479 dst.
get()[get_local_id()[0]] = x;
483 throw runtime_error(
"Sub-groups are not supported on host device.",
488 #ifdef __SYCL_DEVICE_ONLY__
490 template <
int N,
typename T, access::address_space Space>
494 for (
int i = 0; i < N; ++i) {
495 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
499 template <
int N,
typename T, access::address_space Space>
502 N != 1 && N != 3 && N != 16>
504 sycl::detail::sub_group::store(dst, x);
507 template <
int N,
typename T, access::address_space Space>
511 store(multi_ptr<T, Space> dst,
const vec<T, 1> &x)
const {
512 sycl::detail::sub_group::store(dst, x);
515 template <
int N,
typename T, access::address_space Space>
517 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
519 store(multi_ptr<T, Space> dst,
const vec<T, 3> &x)
const {
520 store<1, T, Space>(dst, x.s0());
521 store<2, T, Space>(dst + get_max_local_range()[0], {x.s1(), x.s2()});
524 template <
int N,
typename T, access::address_space Space>
526 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
528 store(multi_ptr<T, Space> dst,
const vec<T, 16> &x)
const {
529 store<8, T, Space>(dst, x.lo());
530 store<8, T, Space>(dst + 8 * get_max_local_range()[0], x.hi());
534 #else // __SYCL_DEVICE_ONLY__
535 template <
int N,
typename T, access::address_space Space>
537 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
541 throw runtime_error(
"Sub-groups are not supported on host device.",
544 #endif // __SYCL_DEVICE_ONLY__
546 template <
int N,
typename T, access::address_space Space>
550 #ifdef __SYCL_DEVICE_ONLY__
551 for (
int i = 0; i < N; ++i) {
552 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
557 throw runtime_error(
"Sub-groups are not supported on host device.",
564 #ifdef __SYCL_DEVICE_ONLY__
572 throw runtime_error(
"Sub-groups are not supported on host device.",
578 "Use barrier() without a fence_space instead.")
580 #ifdef __SYCL_DEVICE_ONLY__
586 throw runtime_error(
"Sub-groups are not supported on host device.",
592 template <
typename T>
594 "sycl::ext::oneapi::broadcast instead.")
596 #ifdef __SYCL_DEVICE_ONLY__
597 return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
601 throw runtime_error(
"Sub-groups are not supported on host device.",
606 template <
typename T,
class BinaryOperation>
608 "sycl::ext::oneapi::reduce instead.")
610 #ifdef __SYCL_DEVICE_ONLY__
613 typename sycl::detail::GroupOpTag<T>::type(), x, op);
617 throw runtime_error(
"Sub-groups are not supported on host device.",
622 template <
typename T,
class BinaryOperation>
624 "sycl::ext::oneapi::reduce instead.")
626 #ifdef __SYCL_DEVICE_ONLY__
627 return op(init,
reduce(x, op));
632 throw runtime_error(
"Sub-groups are not supported on host device.",
637 template <
typename T,
class BinaryOperation>
639 "sycl::ext::oneapi::exclusive_scan instead.")
641 #ifdef __SYCL_DEVICE_ONLY__
644 typename sycl::detail::GroupOpTag<T>::type(), x, op);
648 throw runtime_error(
"Sub-groups are not supported on host device.",
653 template <
typename T,
class BinaryOperation>
655 "sycl::ext::oneapi::exclusive_scan instead.")
657 BinaryOperation op)
const {
658 #ifdef __SYCL_DEVICE_ONLY__
659 if (get_local_id().
get(0) == 0) {
662 T scan = exclusive_scan(x, op);
663 if (get_local_id().
get(0) == 0) {
671 throw runtime_error(
"Sub-groups are not supported on host device.",
676 template <
typename T,
class BinaryOperation>
678 "sycl::ext::oneapi::inclusive_scan instead.")
680 #ifdef __SYCL_DEVICE_ONLY__
683 typename sycl::detail::GroupOpTag<T>::type(), x, op);
687 throw runtime_error(
"Sub-groups are not supported on host device.",
692 template <
typename T,
class BinaryOperation>
694 "sycl::ext::oneapi::inclusive_scan instead.")
697 #ifdef __SYCL_DEVICE_ONLY__
698 if (get_local_id().
get(0) == 0) {
701 return inclusive_scan(x, op);
706 throw runtime_error(
"Sub-groups are not supported on host device.",
711 linear_id_type get_group_linear_range()
const {
712 #ifdef __SYCL_DEVICE_ONLY__
713 return static_cast<linear_id_type
>(get_group_range()[0]);
715 throw runtime_error(
"Sub-groups are not supported on host device.",
721 #ifdef __SYCL_DEVICE_ONLY__
722 return static_cast<linear_id_type
>(get_local_range()[0]);
724 throw runtime_error(
"Sub-groups are not supported on host device.",
730 #ifdef __SYCL_DEVICE_ONLY__
733 throw runtime_error(
"Sub-groups are not supported on host device.",
746 "use sycl::ext::oneapi::experimental::this_sub_group() instead")
748 #ifdef __SYCL_DEVICE_ONLY__
751 throw runtime_error(
"Sub-groups are not supported on host device.",