23 #ifdef __SYCL_DEVICE_ONLY__
29 #include <type_traits>
32 inline namespace _V1 {
46 static_assert(is_multi_ptr_v<MultiPtrTy>);
48 using DecoratedPtrTy = decltype(DecoratedPtr);
53 #ifdef __SYCL_DEVICE_ONLY__
56 deduce_AS<DecoratedPtrTy>::value>::type *;
58 using ResultTy = TargetElemTy *;
60 return reinterpret_cast<ResultTy
>(DecoratedPtr);
63 template <
typename T, access::address_space Space>
65 std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
68 template <
typename T, access::address_space Space>
70 std::bool_constant<!std::is_same_v<void, SelectBlockT<T>> &&
73 #ifdef __SYCL_DEVICE_ONLY__
80 return sycl::bit_cast<T>(Ret);
86 using BlockT = SelectBlockT<T>;
87 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
90 return sycl::bit_cast<typename vec<T, N>::vector_t>(Ret);
96 using BlockT = SelectBlockT<T>;
99 sycl::bit_cast<BlockT>(x));
105 using BlockT = SelectBlockT<T>;
106 using VecT = sycl::detail::ConvertToOpenCLType_t<vec<BlockT, N>>;
109 sycl::bit_cast<VecT>(x));
119 inline multi_ptr<T, Space, IsDecorated>
121 if constexpr (IsDecorated == access::decorated::legacy) {
128 Mptr.get_decorated())};
135 namespace ext::oneapi::this_work_item {
151 #ifdef __SYCL_DEVICE_ONLY__
152 return __spirv_SubgroupLocalInvocationId();
155 "Sub-groups are not supported on host.");
160 #ifdef __SYCL_DEVICE_ONLY__
164 "Sub-groups are not supported on host.");
169 #ifdef __SYCL_DEVICE_ONLY__
170 return __spirv_SubgroupSize();
173 "Sub-groups are not supported on host.");
178 #ifdef __SYCL_DEVICE_ONLY__
179 return __spirv_SubgroupMaxSize();
182 "Sub-groups are not supported on host.");
187 #ifdef __SYCL_DEVICE_ONLY__
188 return __spirv_SubgroupId();
191 "Sub-groups are not supported on host.");
196 #ifdef __SYCL_DEVICE_ONLY__
200 "Sub-groups are not supported on host.");
205 #ifdef __SYCL_DEVICE_ONLY__
206 return __spirv_NumSubgroups();
209 "Sub-groups are not supported on host.");
213 template <
typename T>
215 std::enable_if_t<sycl::detail::is_scalar_arithmetic<T>::value, T>;
219 template <
typename T>
222 #ifdef __SYCL_DEVICE_ONLY__
223 return sycl::detail::spirv::Shuffle(*
this,
x, local_id);
228 "Sub-groups are not supported on host.");
232 template <
typename T>
235 #ifdef __SYCL_DEVICE_ONLY__
236 return sycl::detail::spirv::ShuffleDown(*
this,
x, delta);
241 "Sub-groups are not supported on host.");
245 template <
typename T>
248 #ifdef __SYCL_DEVICE_ONLY__
249 return sycl::detail::spirv::ShuffleUp(*
this,
x, delta);
254 "Sub-groups are not supported on host.");
258 template <
typename T>
261 #ifdef __SYCL_DEVICE_ONLY__
262 return sycl::detail::spirv::ShuffleXor(*
this,
x, value);
267 "Sub-groups are not supported on host.");
273 #ifdef __SYCL_DEVICE_ONLY__
275 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
276 std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value, T>
277 load(CVT *cv_src)
const {
278 T *src =
const_cast<T *
>(cv_src);
280 sycl::detail::deduce_AS<T>::value,
281 sycl::access::decorated::yes>(src));
285 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
286 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value, T>
287 load(CVT *cv_src)
const {
288 T *src =
const_cast<T *
>(cv_src);
290 #if defined(__NVPTX__) || defined(__AMDGCN__)
293 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(src);
297 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(src);
301 assert(!
"Sub-group load() is supported for local or global pointers only.");
306 template <
typename CVT,
typename T = std::remove_cv_t<CVT>>
310 "Sub-groups are not supported on host.");
321 #ifdef __SYCL_DEVICE_ONLY__
322 #if defined(__NVPTX__) || defined(__AMDGCN__)
325 return sycl::detail::sub_group::load(src);
330 "Sub-groups are not supported on host.");
341 #ifdef __SYCL_DEVICE_ONLY__
346 "Sub-groups are not supported on host.");
349 #ifdef __SYCL_DEVICE_ONLY__
350 #if defined(__NVPTX__) || defined(__AMDGCN__)
360 for (
int i = 0; i < N; ++i) {
370 N != 1 && N != 3 && N != 16,
372 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
373 multi_ptr<T, Space, IsDecorated> src =
375 return sycl::detail::sub_group::load<N, T>(src);
384 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
385 multi_ptr<T, Space, IsDecorated> src =
387 return {sycl::detail::sub_group::load<8, T>(src),
388 sycl::detail::sub_group::load<8, T>(src +
398 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
399 multi_ptr<T, Space, IsDecorated> src =
402 sycl::detail::sub_group::load<1, T>(src),
412 load(
const multi_ptr<CVT, Space, IsDecorated> cv_src)
const {
413 multi_ptr<T, Space, IsDecorated> src =
415 return sycl::detail::sub_group::load(src);
427 "Sub-groups are not supported on host.");
439 #ifdef __SYCL_DEVICE_ONLY__
441 for (
int i = 0; i < N; ++i) {
448 "Sub-groups are not supported on host.");
452 #ifdef __SYCL_DEVICE_ONLY__
454 template <
typename T>
455 std::enable_if_t<!std::is_same<remove_decoration_t<T>, T>::value>
458 sycl::detail::deduce_AS<T>::value,
459 sycl::access::decorated::yes>(dst),
464 template <
typename T>
465 std::enable_if_t<std::is_same<remove_decoration_t<T>, T>::value>
468 #if defined(__NVPTX__) || defined(__AMDGCN__)
471 auto l = __SYCL_GenericCastToPtrExplicit_ToLocal<T>(dst);
477 auto g = __SYCL_GenericCastToPtrExplicit_ToGlobal<T>(dst);
484 !
"Sub-group store() is supported for local or global pointers only.");
489 template <
typename T>
void store(T *dst,
const T &
x)
const {
493 "Sub-groups are not supported on host.");
502 #ifdef __SYCL_DEVICE_ONLY__
503 #if defined(__NVPTX__) || defined(__AMDGCN__)
506 sycl::detail::sub_group::store(dst,
x);
512 "Sub-groups are not supported on host.");
521 #ifdef __SYCL_DEVICE_ONLY__
527 "Sub-groups are not supported on host.");
531 #ifdef __SYCL_DEVICE_ONLY__
532 #if defined(__NVPTX__) || defined(__AMDGCN__)
538 for (
int i = 0; i < N; ++i) {
547 N != 1 && N != 3 && N != 16>
548 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, N> &
x)
const {
549 sycl::detail::sub_group::store(dst,
x);
557 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 1> &
x)
const {
558 sycl::detail::sub_group::store(dst,
x);
566 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 3> &
x)
const {
567 store<1, T, Space, DecorateAddress>(dst,
x.s0());
577 store(multi_ptr<T, Space, DecorateAddress> dst,
const vec<T, 16> &
x)
const {
578 store<8, T, Space, DecorateAddress>(dst,
x.lo());
593 "Sub-groups are not supported on host.");
602 #ifdef __SYCL_DEVICE_ONLY__
603 for (
int i = 0; i < N; ++i) {
610 "Sub-groups are not supported on host.");
616 "Sub-group barrier with no arguments is deprecated."
617 "Use sycl::group_barrier with the sub-group as the argument instead.")
619 #ifdef __SYCL_DEVICE_ONLY__
628 "Sub-groups are not supported on host.");
633 "Sub-group barrier accepting fence_space is deprecated."
634 "Use sycl::group_barrier with the sub-group as the argument instead.")
636 #ifdef __SYCL_DEVICE_ONLY__
643 "Sub-groups are not supported on host.");
647 #ifndef __INTEL_PREVIEW_BREAKING_CHANGES__
649 template <
typename T>
651 "sycl::ext::oneapi::broadcast instead.")
653 #ifdef __SYCL_DEVICE_ONLY__
654 return sycl::detail::spirv::GroupBroadcast<sub_group>(
x, local_id);
659 "Sub-groups are not supported on host.");
663 template <
typename T,
class BinaryOperation>
665 "sycl::ext::oneapi::reduce instead.")
667 #ifdef __SYCL_DEVICE_ONLY__
668 return sycl::detail::calc<__spv::GroupOperation::Reduce>(
669 typename sycl::detail::GroupOpTag<T>::type(), *
this,
x, op);
674 "Sub-groups are not supported on host.");
678 template <
typename T,
class BinaryOperation>
680 "sycl::ext::oneapi::reduce instead.")
682 #ifdef __SYCL_DEVICE_ONLY__
683 return op(init,
reduce(
x, op));
689 "Sub-groups are not supported on host.");
693 template <
typename T,
class BinaryOperation>
695 "sycl::ext::oneapi::exclusive_scan instead.")
697 #ifdef __SYCL_DEVICE_ONLY__
698 return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>(
699 typename sycl::detail::GroupOpTag<T>::type(), *
this,
x, op);
704 "Sub-groups are not supported on host.");
708 template <
typename T,
class BinaryOperation>
710 "sycl::ext::oneapi::exclusive_scan instead.")
712 BinaryOperation op)
const {
713 #ifdef __SYCL_DEVICE_ONLY__
717 T scan = exclusive_scan(
x, op);
727 "Sub-groups are not supported on host.");
731 template <
typename T,
class BinaryOperation>
733 "sycl::ext::oneapi::inclusive_scan instead.")
735 #ifdef __SYCL_DEVICE_ONLY__
736 return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>(
737 typename sycl::detail::GroupOpTag<T>::type(), *
this,
x, op);
742 "Sub-groups are not supported on host.");
746 template <
typename T,
class BinaryOperation>
748 "sycl::ext::oneapi::inclusive_scan instead.")
751 #ifdef __SYCL_DEVICE_ONLY__
755 return inclusive_scan(
x, op);
761 "Sub-groups are not supported on host.");
767 #ifdef __SYCL_DEVICE_ONLY__
771 "Sub-groups are not supported on host.");
776 #ifdef __SYCL_DEVICE_ONLY__
780 "Sub-groups are not supported on host.");
784 bool leader()
const {
785 #ifdef __SYCL_DEVICE_ONLY__
789 "Sub-groups are not supported on host.");
795 #ifdef __SYCL_DEVICE_ONLY__
796 return lhs.get_group_id() == rhs.get_group_id();
801 "Sub-groups are not supported on host.");
806 #ifdef __SYCL_DEVICE_ONLY__
807 return !(lhs == rhs);
812 "Sub-groups are not supported on host.");
Identifies an instance of the function object executing at each point in an nd_range.
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
__ESIMD_API void barrier()
Generic work-group barrier.
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::local_space > AcceptableForLocalLoadStore
select_cl_scalar_integral_unsigned_t< T > SelectBlockT
auto convertToBlockPtr(MultiPtrTy MultiPtr)
std::bool_constant<!std::is_same_v< void, SelectBlockT< T > > &&Space==access::address_space::global_space > AcceptableForGlobalLoadStore
auto convertToOpenCLType(T &&x)
auto get_local_linear_range(Group g)
select_apply_cl_scalar_t< T, sycl::opencl::cl_uchar, sycl::opencl::cl_ushort, sycl::opencl::cl_uint, sycl::opencl::cl_ulong > select_cl_scalar_integral_unsigned_t
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
multi_ptr< T, Space, IsDecorated > GetUnqualMultiPtr(const multi_ptr< CVT, Space, IsDecorated > &Mptr)
bool operator==(const cache_config &lhs, const cache_config &rhs)
sycl::sub_group get_sub_group()
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
typename remove_decoration< T >::type remove_decoration_t
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
_Tp reduce(const simd< _Tp, _Abi > &, _BinaryOp=_BinaryOp())
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const
linear_id_type get_group_linear_id() const
T shuffle_up(T x, uint32_t delta) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
__SYCL_DEPRECATED("Sub-group barrier with no arguments is deprecated." "Use sycl::group_barrier with the sub-group as the argument instead.") void barrier() const
range_type get_max_local_range() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > src) const
id_type get_group_id() const
range_type get_local_range() const
static constexpr int dimensions
static constexpr sycl::memory_scope fence_scope
__SYCL_DEPRECATED("Sub-group barrier accepting fence_space is deprecated." "Use sycl::group_barrier with the sub-group as the argument instead.") void barrier(access
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const vec< T, N > &x) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, T > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
id_type get_local_id() const
void store(T *dst, const T &x) const
friend bool operator!=(const sub_group &lhs, const sub_group &rhs)
T shuffle(T x, id_type local_id) const
linear_id_type get_local_linear_id() const
T shuffle_xor(T x, id_type value) const
T shuffle_down(T x, uint32_t delta) const
std::enable_if_t< sycl::detail::sub_group::AcceptableForLocalLoadStore< T, Space >::value, vec< T, N > > load(const multi_ptr< CVT, Space, IsDecorated > cv_src) const
std::enable_if_t< sycl::detail::is_scalar_arithmetic< T >::value, T > EnableIfIsScalarArithmetic
range_type get_group_range() const
std::enable_if_t< sycl::detail::sub_group::AcceptableForGlobalLoadStore< T, Space >::value > store(multi_ptr< T, Space, DecorateAddress > dst, const T &x) const