34 #include <type_traits>
37 inline namespace _V1 {
43 #ifdef __SYCL_DEVICE_ONLY__
44 constexpr uint32_t flags =
45 static_cast<uint32_t
>(
61 template <
typename T,
int Dimensions = 1>
62 class __SYCL_TYPE(private_memory) private_memory {
66 #ifndef __SYCL_DEVICE_ONLY__
69 Val.reset(
new T[G.get_local_range().size()]);
75 T &operator()(
const h_item<Dimensions> &Id) {
76 #ifndef __SYCL_DEVICE_ONLY__
79 size_t Ind = Id.get_physical_local().get_linear_id();
80 return Val.get()[Ind];
88 #ifdef __SYCL_DEVICE_ONLY__
95 std::unique_ptr<T[]> Val;
103 template <
int Dimensions = 1>
class __SYCL_TYPE(group)
group {
105 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
106 using id_type = id<Dimensions>;
107 using range_type = range<Dimensions>;
108 using linear_id_type = size_t;
113 sycl::memory_scope::work_group;
118 id<
Dimensions> get_id()
const {
return index; }
121 size_t get_id(
int dimension)
const {
return index[dimension]; }
123 id<Dimensions> get_group_id()
const {
return index; }
125 size_t get_group_id(
int dimension)
const {
return index[dimension]; }
128 "sycl::group::get_max_local_range() instead")
129 range<
Dimensions> get_global_range()
const {
return globalRange; }
131 size_t get_global_range(
int dimension)
const {
132 return globalRange[dimension];
135 id<Dimensions> get_local_id()
const {
136 #ifdef __SYCL_DEVICE_ONLY__
137 return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
140 "get_local_id() is not implemented on host");
144 size_t get_local_id(
int dimention)
const {
return get_local_id()[dimention]; }
147 return get_local_linear_id_impl<Dimensions>();
150 range<Dimensions> get_local_range()
const {
return localRange; }
152 size_t get_local_range(
int dimension)
const {
return localRange[dimension]; }
155 return get_local_linear_range_impl();
158 range<Dimensions> get_group_range()
const {
return groupRange; }
160 size_t get_group_range(
int dimension)
const {
161 return get_group_range()[dimension];
164 size_t get_group_linear_range()
const {
165 return get_group_linear_range_impl();
168 range<Dimensions> get_max_local_range()
const {
return get_local_range(); }
170 size_t operator[](
int dimension)
const {
return index[dimension]; }
173 size_t get_linear_id()
const {
return get_group_linear_id(); }
175 size_t get_group_linear_id()
const {
return get_group_linear_id_impl(); }
179 template <
typename WorkItemFunctionT>
180 void parallel_for_work_item(WorkItemFunctionT Func)
const {
184 #ifdef __SYCL_DEVICE_ONLY__
185 range<Dimensions> GlobalSize{
186 __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
187 range<Dimensions> LocalSize{
188 __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
189 id<Dimensions> GlobalId{
190 __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
191 id<Dimensions> LocalId{
192 __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
198 item<Dimensions, false> GlobalItem =
199 detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
200 item<Dimensions, false> LocalItem =
201 detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
202 h_item<Dimensions> HItem =
203 detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
207 id<Dimensions> GroupStartID = index * id<Dimensions>{localRange};
211 localRange, [&](
const id<Dimensions> &LocalID) {
212 item<Dimensions, false> GlobalItem =
213 detail::Builder::createItem<Dimensions, false>(
214 globalRange, GroupStartID + LocalID);
215 item<Dimensions, false> LocalItem =
216 detail::Builder::createItem<Dimensions, false>(localRange,
218 h_item<Dimensions> HItem =
219 detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
230 template <
typename WorkItemFunctionT>
231 void parallel_for_work_item(range<Dimensions> flexibleRange,
232 WorkItemFunctionT Func)
const {
234 #ifdef __SYCL_DEVICE_ONLY__
235 range<Dimensions> GlobalSize{
236 __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
237 range<Dimensions> LocalSize{
238 __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
239 id<Dimensions> GlobalId{
240 __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
241 id<Dimensions> LocalId{
242 __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
244 item<Dimensions, false> GlobalItem =
245 detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
246 item<Dimensions, false> LocalItem =
247 detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
248 h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
249 GlobalItem, LocalItem, flexibleRange);
255 LocalId, LocalSize, flexibleRange,
256 [&](
const id<Dimensions> &LogicalLocalID) {
257 HItem.setLogicalLocalID(LogicalLocalID);
261 id<Dimensions> GroupStartID = index * localRange;
264 localRange, [&](
const id<Dimensions> &LocalID) {
265 item<Dimensions, false> GlobalItem =
266 detail::Builder::createItem<Dimensions, false>(
267 globalRange, GroupStartID + LocalID);
268 item<Dimensions, false> LocalItem =
269 detail::Builder::createItem<Dimensions, false>(localRange,
271 h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
272 GlobalItem, LocalItem, flexibleRange);
275 LocalID, localRange, flexibleRange,
276 [&](
const id<Dimensions> &LogicalLocalID) {
277 HItem.setLogicalLocalID(LogicalLocalID);
287 template <access::mode accessMode = access::mode::read_write>
311 template <
typename dataT>
313 std::enable_if_t<!detail::is_bool<dataT>::value,
314 device_event> async_work_group_copy(
local_ptr<dataT> dest,
317 size_t srcStride)
const {
321 return device_event(E);
329 template <
typename dataT>
331 std::enable_if_t<!detail::is_bool<dataT>::value,
332 device_event> async_work_group_copy(
global_ptr<dataT> dest,
340 return device_event(E);
349 template <
typename DestDataT,
typename SrcDataT>
350 std::enable_if_t<!detail::is_bool<DestDataT>::value &&
351 std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
353 async_work_group_copy(decorated_local_ptr<DestDataT> dest,
354 decorated_global_ptr<SrcDataT> src,
size_t numElements,
355 size_t srcStride)
const {
359 return device_event(E);
368 template <
typename DestDataT,
typename SrcDataT>
369 std::enable_if_t<!detail::is_bool<DestDataT>::value &&
370 std::is_same_v<std::remove_const_t<SrcDataT>, DestDataT>,
372 async_work_group_copy(decorated_global_ptr<DestDataT> dest,
373 decorated_local_ptr<SrcDataT> src,
size_t numElements,
374 size_t destStride)
const {
378 return device_event(E);
386 template <
typename T, access::address_space DestS, access::address_space SrcS>
389 detail::is_scalar_bool<T>::value,
390 device_event> async_work_group_copy(
multi_ptr<T, DestS,
397 size_t Stride)
const {
398 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
399 "Async copy to/from bool memory is not supported.");
400 auto DestP = multi_ptr<uint8_t, DestS, access::decorated::legacy>(
401 reinterpret_cast<uint8_t *
>(Dest.get()));
402 auto SrcP = multi_ptr<uint8_t, SrcS, access::decorated::legacy>(
403 reinterpret_cast<uint8_t *
>(Src.get()));
404 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
412 template <
typename T, access::address_space DestS, access::address_space SrcS>
415 detail::is_vector_bool<T>::value,
416 device_event> async_work_group_copy(
multi_ptr<T, DestS,
423 size_t Stride)
const {
424 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
425 "Async copy to/from bool memory is not supported.");
426 using VecT = detail::change_base_type_t<T, uint8_t>;
427 auto DestP = address_space_cast<DestS, access::decorated::legacy>(
428 reinterpret_cast<VecT *
>(Dest.get()));
429 auto SrcP = address_space_cast<SrcS, access::decorated::legacy>(
430 reinterpret_cast<VecT *
>(Src.get()));
431 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
441 std::enable_if_t<detail::is_scalar_bool<DestT>::value &&
442 std::is_same_v<std::remove_const_t<SrcT>, DestT>,
444 async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
445 multi_ptr<SrcT, SrcS, access::decorated::yes> Src,
446 size_t NumElements,
size_t Stride)
const {
447 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
448 "Async copy to/from bool memory is not supported.");
450 std::conditional_t<std::is_const_v<SrcT>,
const uint8_t, uint8_t>;
451 auto DestP = multi_ptr<uint8_t, DestS, access::decorated::yes>(
454 Dest.get_decorated()));
455 auto SrcP = multi_ptr<QualSrcT, SrcS, access::decorated::yes>(
458 Src.get_decorated()));
459 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
469 std::enable_if_t<detail::is_vector_bool<DestT>::value &&
470 std::is_same_v<std::remove_const_t<SrcT>, DestT>,
472 async_work_group_copy(multi_ptr<DestT, DestS, access::decorated::yes> Dest,
473 multi_ptr<SrcT, SrcS, access::decorated::yes> Src,
474 size_t NumElements,
size_t Stride)
const {
475 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
476 "Async copy to/from bool memory is not supported.");
477 using VecT = detail::change_base_type_t<DestT, uint8_t>;
479 std::conditional_t<std::is_const_v<SrcT>, std::add_const_t<VecT>, VecT>;
480 auto DestP = multi_ptr<VecT, DestS, access::decorated::yes>(
483 Dest.get_decorated()));
484 auto SrcP = multi_ptr<QualSrcVecT, SrcS, access::decorated::yes>(
487 Src.get_decorated()));
488 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
496 template <
typename dataT>
500 size_t numElements)
const {
501 return async_work_group_copy(dest, src, numElements, 1);
509 template <
typename dataT>
513 size_t numElements)
const {
514 return async_work_group_copy(dest, src, numElements, 1);
523 template <
typename DestDataT,
typename SrcDataT>
524 typename std::enable_if_t<
525 std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
526 async_work_group_copy(decorated_local_ptr<DestDataT> dest,
527 decorated_global_ptr<SrcDataT> src,
528 size_t numElements)
const {
529 return async_work_group_copy(dest, src, numElements, 1);
538 template <
typename DestDataT,
typename SrcDataT>
539 typename std::enable_if_t<
540 std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>, device_event>
541 async_work_group_copy(decorated_global_ptr<DestDataT> dest,
542 decorated_local_ptr<SrcDataT> src,
543 size_t numElements)
const {
544 return async_work_group_copy(dest, src, numElements, 1);
547 template <
typename... eventTN>
void wait_for(eventTN... Events)
const {
548 waitForHelper(Events...);
551 bool operator==(
const group<Dimensions> &rhs)
const {
552 bool Result = (rhs.globalRange == globalRange) &&
553 (rhs.localRange == localRange) && (rhs.index == index);
555 "inconsistent group class fields");
559 bool operator!=(
const group<Dimensions> &rhs)
const {
560 return !((*this) == rhs);
564 range<Dimensions> globalRange;
565 range<Dimensions> localRange;
566 range<Dimensions> groupRange;
567 id<Dimensions> index;
569 template <
int dims = Dimensions>
570 typename std::enable_if_t<(dims == 1),
size_t>
571 get_local_linear_id_impl()
const {
572 id<Dimensions> localId = get_local_id();
576 template <
int dims = Dimensions>
577 typename std::enable_if_t<(dims == 2),
size_t>
578 get_local_linear_id_impl()
const {
579 id<Dimensions> localId = get_local_id();
580 return localId[0] * localRange[1] + localId[1];
583 template <
int dims = Dimensions>
584 typename std::enable_if_t<(dims == 3),
size_t>
585 get_local_linear_id_impl()
const {
586 id<Dimensions> localId = get_local_id();
587 return (localId[0] * localRange[1] * localRange[2]) +
588 (localId[1] * localRange[2]) + localId[2];
591 template <
int dims = Dimensions>
592 typename std::enable_if_t<(dims == 1),
size_t>
593 get_local_linear_range_impl()
const {
594 auto localRange = get_local_range();
595 return localRange[0];
598 template <
int dims = Dimensions>
599 typename std::enable_if_t<(dims == 2),
size_t>
600 get_local_linear_range_impl()
const {
601 auto localRange = get_local_range();
602 return localRange[0] * localRange[1];
605 template <
int dims = Dimensions>
606 typename std::enable_if_t<(dims == 3),
size_t>
607 get_local_linear_range_impl()
const {
608 auto localRange = get_local_range();
609 return localRange[0] * localRange[1] * localRange[2];
612 template <
int dims = Dimensions>
613 typename std::enable_if_t<(dims == 1),
size_t>
614 get_group_linear_range_impl()
const {
615 auto groupRange = get_group_range();
616 return groupRange[0];
619 template <
int dims = Dimensions>
620 typename std::enable_if_t<(dims == 2),
size_t>
621 get_group_linear_range_impl()
const {
622 auto groupRange = get_group_range();
623 return groupRange[0] * groupRange[1];
626 template <
int dims = Dimensions>
627 typename std::enable_if_t<(dims == 3),
size_t>
628 get_group_linear_range_impl()
const {
629 auto groupRange = get_group_range();
630 return groupRange[0] * groupRange[1] * groupRange[2];
633 template <
int dims = Dimensions>
634 typename std::enable_if_t<(dims == 1),
size_t>
635 get_group_linear_id_impl()
const {
639 template <
int dims = Dimensions>
640 typename std::enable_if_t<(dims == 2),
size_t>
641 get_group_linear_id_impl()
const {
642 return index[0] * groupRange[1] + index[1];
655 template <
int dims = Dimensions>
656 typename std::enable_if_t<(dims == 3),
size_t>
657 get_group_linear_id_impl()
const {
658 return (index[0] * groupRange[1] * groupRange[2]) +
659 (index[1] * groupRange[2]) + index[2];
662 void waitForHelper()
const {}
664 void waitForHelper(device_event Event)
const { Event.wait(); }
666 template <
typename T,
typename... Ts>
667 void waitForHelper(T E, Ts... Es)
const {
669 waitForHelper(Es...);
673 friend class detail::Builder;
674 group(
const range<Dimensions> &G,
const range<Dimensions> &L,
675 const range<Dimensions> GroupRange,
const id<Dimensions> &I)
676 : globalRange(
G), localRange(L), groupRange(GroupRange), index(I) {}
auto convertToOpenCLType(T &&x)
auto get_local_linear_id(Group g)
auto get_local_linear_range(Group g)
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
fence_scope
The scope that fence() operation should apply to.
@ group
Wait until all previous memory transactions from this thread are observed within the local thread-gro...
bool operator==(const cache_config &lhs, const cache_config &rhs)
bool operator!=(const cache_config &lhs, const cache_config &rhs)
T & operator[](std::ptrdiff_t idx) const noexcept
class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
signed char __SYCL2020_DEPRECATED
multi_ptr< ElementType, access::address_space::global_space, IsDecorated > global_ptr
std::conditional_t< is_decorated, decorated_type *, std::add_pointer_t< value_type > > pointer
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
multi_ptr< ElementType, access::address_space::local_space, IsDecorated > local_ptr
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __DPCPP_SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, const dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
static __SYCL_ALWAYS_INLINE void iterate(const LoopBoundTy< NDims > &UpperBound, FuncTy f)
Generates ND loop nest with {0,..0} .