25 #include <type_traits>
41 template <
typename T, access::address_space Space>
44 Space == access::address_space::global_space>;
46 template <
typename T, access::address_space Space>
49 Space == access::address_space::local_space>;
51 #ifdef __SYCL_DEVICE_ONLY__
60 __spirv_SubgroupBlockReadINTEL<BlockT>(
reinterpret_cast<PtrT
>(src.
get()));
62 return sycl::bit_cast<T>(Ret);
68 using BlockT = SelectBlockT<T>;
69 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
74 __spirv_SubgroupBlockReadINTEL<VecT>(
reinterpret_cast<PtrT
>(src.
get()));
76 return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
82 using BlockT = SelectBlockT<T>;
86 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.
get()),
87 sycl::bit_cast<BlockT>(x));
92 void store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, N> &x) {
93 using BlockT = SelectBlockT<T>;
94 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
96 const multi_ptr<BlockT, Space, DecorateAddress>>;
98 __spirv_SubgroupBlockWriteINTEL(
reinterpret_cast<PtrT
>(dst.get()),
99 sycl::bit_cast<VecT>(x));
109 inline multi_ptr<T, Space, IsDecorated>
111 if constexpr (IsDecorated == access::decorated::legacy) {
124 namespace ext::oneapi {
127 namespace experimental {
136 static constexpr
int dimensions = 1;
138 sycl::memory_scope::sub_group;
143 #ifdef __SYCL_DEVICE_ONLY__
144 return __spirv_SubgroupLocalInvocationId();
146 throw runtime_error(
"Sub-groups are not supported on host device.",
147 PI_ERROR_INVALID_DEVICE);
152 #ifdef __SYCL_DEVICE_ONLY__
155 throw runtime_error(
"Sub-groups are not supported on host device.",
156 PI_ERROR_INVALID_DEVICE);
161 #ifdef __SYCL_DEVICE_ONLY__
162 return __spirv_SubgroupSize();
164 throw runtime_error(
"Sub-groups are not supported on host device.",
165 PI_ERROR_INVALID_DEVICE);
170 #ifdef __SYCL_DEVICE_ONLY__
171 return __spirv_SubgroupMaxSize();
173 throw runtime_error(
"Sub-groups are not supported on host device.",
174 PI_ERROR_INVALID_DEVICE);
179 #ifdef __SYCL_DEVICE_ONLY__
180 return __spirv_SubgroupId();
182 throw runtime_error(
"Sub-groups are not supported on host device.",
183 PI_ERROR_INVALID_DEVICE);
188 #ifdef __SYCL_DEVICE_ONLY__
191 throw runtime_error(
"Sub-groups are not supported on host device.",
192 PI_ERROR_INVALID_DEVICE);
197 #ifdef __SYCL_DEVICE_ONLY__
198 return __spirv_NumSubgroups();
200 throw runtime_error(
"Sub-groups are not supported on host device.",
201 PI_ERROR_INVALID_DEVICE);
205 template <
typename T>
207 std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
213 #ifdef __SYCL_DEVICE_ONLY__
214 return sycl::detail::spirv::SubgroupShuffle(x, local_id);
218 throw runtime_error(
"Sub-groups are not supported on host device.",
219 PI_ERROR_INVALID_DEVICE);
224 #ifdef __SYCL_DEVICE_ONLY__
225 return sycl::detail::spirv::SubgroupShuffleDown(x, delta);
229 throw runtime_error(
"Sub-groups are not supported on host device.",
230 PI_ERROR_INVALID_DEVICE);
234 template <
typename T> T
shuffle_up(T x, uint32_t delta)
const {
235 #ifdef __SYCL_DEVICE_ONLY__
236 return sycl::detail::spirv::SubgroupShuffleUp(x, delta);
240 throw runtime_error(
"Sub-groups are not supported on host device.",
241 PI_ERROR_INVALID_DEVICE);
246 #ifdef __SYCL_DEVICE_ONLY__
247 return sycl::detail::spirv::SubgroupShuffleXor(x, value);
251 throw runtime_error(
"Sub-groups are not supported on host device.",
252 PI_ERROR_INVALID_DEVICE);
258 #ifdef __SYCL_DEVICE_ONLY__
260 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
261 std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
262 load(CVT *cv_src)
const {
263 T *src =
const_cast<T *
>(cv_src);
265 sycl::detail::deduce_AS<T>::value,
266 sycl::access::decorated::yes>(src));
270 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
271 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
272 load(CVT *cv_src)
const {
273 T *src =
const_cast<T *
>(cv_src);
276 return src[get_local_id()[0]];
278 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
282 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
286 assert(!
"Sub-group load() is supported for local or global pointers only.");
291 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
294 throw runtime_error(
"Sub-groups are not supported on host device.",
295 PI_ERROR_INVALID_DEVICE);
302 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value, T>
306 #ifdef __SYCL_DEVICE_ONLY__
308 return src.
get()[get_local_id()[0]];
310 return sycl::detail::sub_group::load(src);
314 throw runtime_error(
"Sub-groups are not supported on host device.",
315 PI_ERROR_INVALID_DEVICE);
322 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value, T>
326 #ifdef __SYCL_DEVICE_ONLY__
327 return src.
get()[get_local_id()[0]];
330 throw runtime_error(
"Sub-groups are not supported on host device.",
331 PI_ERROR_INVALID_DEVICE);
334 #ifdef __SYCL_DEVICE_ONLY__
339 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
345 for (
int i = 0; i < N; ++i) {
346 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
354 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
355 N != 1 && N != 3 && N != 16,
360 return sycl::detail::sub_group::load<N, T>(src);
366 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
369 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
370 multi_ptr<T, Space, IsDecorated> src =
372 return {sycl::detail::sub_group::load<8, T>(src),
373 sycl::detail::sub_group::load<8, T>(src +
374 8 * get_max_local_range()[0])};
380 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
383 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
384 multi_ptr<T, Space, IsDecorated> src =
387 sycl::detail::sub_group::load<1, T>(src),
388 sycl::detail::sub_group::load<2, T>(src + get_max_local_range()[0])};
394 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
397 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
398 multi_ptr<T, Space, IsDecorated> src =
400 return sycl::detail::sub_group::load(src);
407 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value,
411 throw runtime_error(
"Sub-groups are not supported on host device.",
412 PI_ERROR_INVALID_DEVICE);
419 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value,
424 #ifdef __SYCL_DEVICE_ONLY__
426 for (
int i = 0; i < N; ++i) {
427 res[i] = *(src.
get() + i * get_max_local_range()[0] + get_local_id()[0]);
432 throw runtime_error(
"Sub-groups are not supported on host device.",
433 PI_ERROR_INVALID_DEVICE);
437 #ifdef __SYCL_DEVICE_ONLY__
439 template <
typename T>
440 std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
443 sycl::detail::deduce_AS<T>::value,
444 sycl::access::decorated::yes>(dst),
449 template <
typename T>
450 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
454 dst[get_local_id()[0]] = x;
456 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
462 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
469 !
"Sub-group store() is supported for local or global pointers only.");
474 template <
typename T>
void store(T *dst,
const T &x)
const {
477 throw runtime_error(
"Sub-groups are not supported on host device.",
478 PI_ERROR_INVALID_DEVICE);
485 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
487 #ifdef __SYCL_DEVICE_ONLY__
489 dst.
get()[get_local_id()[0]] = x;
491 sycl::detail::sub_group::store(dst, x);
496 throw runtime_error(
"Sub-groups are not supported on host device.",
497 PI_ERROR_INVALID_DEVICE);
504 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
506 #ifdef __SYCL_DEVICE_ONLY__
507 dst.
get()[get_local_id()[0]] = x;
511 throw runtime_error(
"Sub-groups are not supported on host device.",
512 PI_ERROR_INVALID_DEVICE);
516 #ifdef __SYCL_DEVICE_ONLY__
521 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
523 for (
int i = 0; i < N; ++i) {
524 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
531 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
532 N != 1 && N != 3 && N != 16>
534 sycl::detail::sub_group::store(dst, x);
540 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
542 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 1> &x)
const {
543 sycl::detail::sub_group::store(dst, x);
549 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
551 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 3> &x)
const {
552 store<1, T, Space, DecorateAddress>(dst, x.s0());
553 store<2, T, Space, DecorateAddress>(dst + get_max_local_range()[0],
560 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value &&
562 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 16> &x)
const {
563 store<8, T, Space, DecorateAddress>(dst, x.lo());
564 store<8, T, Space, DecorateAddress>(dst + 8 * get_max_local_range()[0],
573 sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>
577 throw runtime_error(
"Sub-groups are not supported on host device.",
578 PI_ERROR_INVALID_DEVICE);
585 sycl::detail::sub_group::AcceptableForLocalLoadStore<T, Space>::value>
587 #ifdef __SYCL_DEVICE_ONLY__
588 for (
int i = 0; i < N; ++i) {
589 *(dst.
get() + i * get_max_local_range()[0] + get_local_id()[0]) = x[i];
594 throw runtime_error(
"Sub-groups are not supported on host device.",
595 PI_ERROR_INVALID_DEVICE);
601 #ifdef __SYCL_DEVICE_ONLY__
609 throw runtime_error(
"Sub-groups are not supported on host device.",
610 PI_ERROR_INVALID_DEVICE);
615 "Use barrier() without a fence_space instead.")
617 #ifdef __SYCL_DEVICE_ONLY__
623 throw runtime_error(
"Sub-groups are not supported on host device.",
624 PI_ERROR_INVALID_DEVICE);
629 template <
typename T>
631 "sycl::ext::oneapi::broadcast instead.")
633 #ifdef __SYCL_DEVICE_ONLY__
634 return sycl::detail::spirv::GroupBroadcast<sub_group>(x, local_id);
638 throw runtime_error(
"Sub-groups are not supported on host device.",
639 PI_ERROR_INVALID_DEVICE);
643 template <
typename T,
class BinaryOperation>
645 "sycl::ext::oneapi::reduce instead.")
647 #ifdef __SYCL_DEVICE_ONLY__
650 typename sycl::detail::GroupOpTag<T>::type(), x, op);
654 throw runtime_error(
"Sub-groups are not supported on host device.",
655 PI_ERROR_INVALID_DEVICE);
659 template <
typename T,
class BinaryOperation>
661 "sycl::ext::oneapi::reduce instead.")
663 #ifdef __SYCL_DEVICE_ONLY__
664 return op(init,
reduce(x, op));
669 throw runtime_error(
"Sub-groups are not supported on host device.",
670 PI_ERROR_INVALID_DEVICE);
674 template <
typename T,
class BinaryOperation>
676 "sycl::ext::oneapi::exclusive_scan instead.")
678 #ifdef __SYCL_DEVICE_ONLY__
681 typename sycl::detail::GroupOpTag<T>::type(), x, op);
685 throw runtime_error(
"Sub-groups are not supported on host device.",
686 PI_ERROR_INVALID_DEVICE);
690 template <
typename T,
class BinaryOperation>
692 "sycl::ext::oneapi::exclusive_scan instead.")
694 BinaryOperation op)
const {
695 #ifdef __SYCL_DEVICE_ONLY__
696 if (get_local_id().
get(0) == 0) {
699 T scan = exclusive_scan(x, op);
700 if (get_local_id().
get(0) == 0) {
708 throw runtime_error(
"Sub-groups are not supported on host device.",
709 PI_ERROR_INVALID_DEVICE);
713 template <
typename T,
class BinaryOperation>
715 "sycl::ext::oneapi::inclusive_scan instead.")
717 #ifdef __SYCL_DEVICE_ONLY__
720 typename sycl::detail::GroupOpTag<T>::type(), x, op);
724 throw runtime_error(
"Sub-groups are not supported on host device.",
725 PI_ERROR_INVALID_DEVICE);
729 template <
typename T,
class BinaryOperation>
731 "sycl::ext::oneapi::inclusive_scan instead.")
734 #ifdef __SYCL_DEVICE_ONLY__
735 if (get_local_id().
get(0) == 0) {
738 return inclusive_scan(x, op);
743 throw runtime_error(
"Sub-groups are not supported on host device.",
744 PI_ERROR_INVALID_DEVICE);
748 linear_id_type get_group_linear_range()
const {
749 #ifdef __SYCL_DEVICE_ONLY__
750 return static_cast<linear_id_type
>(get_group_range()[0]);
752 throw runtime_error(
"Sub-groups are not supported on host device.",
753 PI_ERROR_INVALID_DEVICE);
758 #ifdef __SYCL_DEVICE_ONLY__
759 return static_cast<linear_id_type
>(get_local_range()[0]);
761 throw runtime_error(
"Sub-groups are not supported on host device.",
762 PI_ERROR_INVALID_DEVICE);
766 bool leader()
const {
767 #ifdef __SYCL_DEVICE_ONLY__
770 throw runtime_error(
"Sub-groups are not supported on host device.",
771 PI_ERROR_INVALID_DEVICE);
776 template <
int dimensions>
friend class sycl::nd_item;
783 "use sycl::ext::oneapi::experimental::this_sub_group() instead")
785 #ifdef __SYCL_DEVICE_ONLY__
788 throw runtime_error(
"Sub-groups are not supported on host device.",
789 PI_ERROR_INVALID_DEVICE);
Provides constructors for address space qualified and non address space qualified pointers to allow i...
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
decorated_type * get_decorated() const
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_DEPRECATED(message)
__ESIMD_API void barrier()
Generic work-group barrier.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
bool_constant<!std::is_same< void, SelectBlockT< T > >::value &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
select_apply_cl_scalar_t< T, sycl::cl_uchar, sycl::cl_ushort, sycl::cl_uint, sycl::cl_ulong > select_cl_scalar_integral_unsigned_t
std::integral_constant< bool, V > bool_constant
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
typename std::enable_if< B, T >::type enable_if_t
size_t get_local_linear_range(Group g)
Group::linear_id_type get_local_linear_id(Group g)
sub_group this_sub_group()
sycl::detail::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
typename remove_decoration< T >::type remove_decoration_t
---— Error handling, matching OpenCL plugin semantics.
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
T shuffle(T x, id_type local_id) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
linear_id_type get_group_linear_id() const
range_type get_group_range() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
T shuffle_up(T x, uint32_t delta) const
void store(T *dst, const T &x) const
id_type get_group_id() const
range_type get_local_range() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
T shuffle_down(T x, uint32_t delta) const
range_type get_max_local_range() const
linear_id_type get_local_linear_id() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
std::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
id_type get_local_id() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
T shuffle_xor(T x, id_type value) const