25 #include <type_traits>
34 #ifdef __SYCL_DEVICE_ONLY__
35 constexpr uint32_t flags =
36 static_cast<uint32_t
>(
52 template <
typename T,
int Dimensions = 1>
53 class __SYCL_TYPE(private_memory) private_memory {
56 private_memory(
const group<Dimensions> &G) {
57 #ifndef __SYCL_DEVICE_ONLY__
60 Val.reset(
new T[G.get_local_range().size()]);
66 T &operator()(
const h_item<Dimensions> &Id) {
67 #ifndef __SYCL_DEVICE_ONLY__
70 size_t Ind = Id.get_physical_local().get_linear_id();
71 return Val.get()[Ind];
79 #ifdef __SYCL_DEVICE_ONLY__
86 std::unique_ptr<T[]> Val;
94 template <
int Dimensions = 1>
class __SYCL_TYPE(group)
group {
96 #ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__
97 using id_type = id<Dimensions>;
98 using range_type = range<Dimensions>;
99 using linear_id_type = size_t;
104 sycl::memory_scope::work_group;
109 id<
Dimensions> get_id()
const {
return index; }
112 size_t get_id(
int dimension)
const {
return index[dimension]; }
114 id<Dimensions> get_group_id()
const {
return index; }
116 size_t get_group_id(
int dimension)
const {
return index[dimension]; }
119 "sycl::group::get_max_local_range() instead")
120 range<
Dimensions> get_global_range()
const {
return globalRange; }
122 size_t get_global_range(
int dimension)
const {
123 return globalRange[dimension];
126 id<Dimensions> get_local_id()
const {
127 #ifdef __SYCL_DEVICE_ONLY__
128 return __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>();
130 throw runtime_error(
"get_local_id() is not implemented on host device",
131 PI_ERROR_INVALID_DEVICE);
139 size_t get_local_id(
int dimention)
const {
return get_local_id()[dimention]; }
142 return get_local_linear_id_impl<Dimensions>();
145 range<Dimensions> get_local_range()
const {
return localRange; }
147 size_t get_local_range(
int dimension)
const {
return localRange[dimension]; }
150 return get_local_linear_range_impl();
153 range<Dimensions> get_group_range()
const {
return groupRange; }
155 size_t get_group_range(
int dimension)
const {
156 return get_group_range()[dimension];
159 size_t get_group_linear_range()
const {
160 return get_group_linear_range_impl();
163 range<Dimensions> get_max_local_range()
const {
return get_local_range(); }
165 size_t operator[](
int dimension)
const {
return index[dimension]; }
168 size_t get_linear_id()
const {
return get_group_linear_id(); }
170 size_t get_group_linear_id()
const {
return get_group_linear_id_impl(); }
174 template <
typename WorkItemFunctionT>
175 void parallel_for_work_item(WorkItemFunctionT Func)
const {
179 #ifdef __SYCL_DEVICE_ONLY__
180 range<Dimensions> GlobalSize{
181 __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
182 range<Dimensions> LocalSize{
183 __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
184 id<Dimensions> GlobalId{
185 __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
186 id<Dimensions> LocalId{
187 __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
193 item<Dimensions, false> GlobalItem =
194 detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
195 item<Dimensions, false> LocalItem =
196 detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
197 h_item<Dimensions> HItem =
198 detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
202 id<Dimensions> GroupStartID = index * localRange;
205 detail::NDLoop<Dimensions>::iterate(
206 localRange, [&](
const id<Dimensions> &LocalID) {
207 item<Dimensions, false> GlobalItem =
208 detail::Builder::createItem<Dimensions, false>(
209 globalRange, GroupStartID + LocalID);
210 item<Dimensions, false> LocalItem =
211 detail::Builder::createItem<Dimensions, false>(localRange,
213 h_item<Dimensions> HItem =
214 detail::Builder::createHItem<Dimensions>(GlobalItem, LocalItem);
225 template <
typename WorkItemFunctionT>
226 void parallel_for_work_item(range<Dimensions> flexibleRange,
227 WorkItemFunctionT Func)
const {
229 #ifdef __SYCL_DEVICE_ONLY__
230 range<Dimensions> GlobalSize{
231 __spirv::initGlobalSize<Dimensions, range<Dimensions>>()};
232 range<Dimensions> LocalSize{
233 __spirv::initWorkgroupSize<Dimensions, range<Dimensions>>()};
234 id<Dimensions> GlobalId{
235 __spirv::initGlobalInvocationId<Dimensions, id<Dimensions>>()};
236 id<Dimensions> LocalId{
237 __spirv::initLocalInvocationId<Dimensions, id<Dimensions>>()};
239 item<Dimensions, false> GlobalItem =
240 detail::Builder::createItem<Dimensions, false>(GlobalSize, GlobalId);
241 item<Dimensions, false> LocalItem =
242 detail::Builder::createItem<Dimensions, false>(LocalSize, LocalId);
243 h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
244 GlobalItem, LocalItem, flexibleRange);
249 detail::NDLoop<Dimensions>::iterate(
250 LocalId, LocalSize, flexibleRange,
251 [&](
const id<Dimensions> &LogicalLocalID) {
252 HItem.setLogicalLocalID(LogicalLocalID);
256 id<Dimensions> GroupStartID = index * localRange;
258 detail::NDLoop<Dimensions>::iterate(
259 localRange, [&](
const id<Dimensions> &LocalID) {
260 item<Dimensions, false> GlobalItem =
261 detail::Builder::createItem<Dimensions, false>(
262 globalRange, GroupStartID + LocalID);
263 item<Dimensions, false> LocalItem =
264 detail::Builder::createItem<Dimensions, false>(localRange,
266 h_item<Dimensions> HItem = detail::Builder::createHItem<Dimensions>(
267 GlobalItem, LocalItem, flexibleRange);
269 detail::NDLoop<Dimensions>::iterate(
270 LocalID, localRange, flexibleRange,
271 [&](
const id<Dimensions> &LogicalLocalID) {
272 HItem.setLogicalLocalID(LogicalLocalID);
282 template <access::mode accessMode = access::mode::read_write>
288 accessSpace = access::fence_space::global_and_local)
const {
306 template <
typename dataT>
307 detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
308 async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
309 size_t numElements,
size_t srcStride)
const {
315 numElements, srcStride, 0);
316 return device_event(E);
324 template <
typename dataT>
325 detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
326 async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
327 size_t numElements,
size_t destStride)
const {
333 numElements, destStride, 0);
334 return device_event(E);
344 detail::enable_if_t<detail::is_scalar_bool<T>::value, device_event>
345 async_work_group_copy(multi_ptr<T, DestS, DestIsDecorated> Dest,
346 multi_ptr<T, SrcS, SrcIsDecorated> Src,
347 size_t NumElements,
size_t Stride)
const {
348 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
349 "Async copy to/from bool memory is not supported.");
350 auto DestP = multi_ptr<uint8_t, DestS, DestIsDecorated>(
351 reinterpret_cast<uint8_t *
>(Dest.get()));
352 auto SrcP = multi_ptr<uint8_t, SrcS, SrcIsDecorated>(
353 reinterpret_cast<uint8_t *
>(Src.get()));
354 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
364 detail::enable_if_t<detail::is_vector_bool<T>::value, device_event>
365 async_work_group_copy(multi_ptr<T, DestS, DestIsDecorated> Dest,
366 multi_ptr<T, SrcS, SrcIsDecorated> Src,
367 size_t NumElements,
size_t Stride)
const {
368 static_assert(
sizeof(
bool) ==
sizeof(uint8_t),
369 "Async copy to/from bool memory is not supported.");
370 using VecT = detail::change_base_type_t<T, uint8_t>;
371 auto DestP = address_space_cast<DestS, DestIsDecorated>(
372 reinterpret_cast<VecT *
>(Dest.get()));
373 auto SrcP = address_space_cast<SrcS, SrcIsDecorated>(
374 reinterpret_cast<VecT *
>(Src.get()));
375 return async_work_group_copy(DestP, SrcP, NumElements, Stride);
383 template <
typename dataT>
384 device_event async_work_group_copy(local_ptr<dataT> dest,
385 global_ptr<dataT> src,
386 size_t numElements)
const {
387 return async_work_group_copy(dest, src, numElements, 1);
395 template <
typename dataT>
396 device_event async_work_group_copy(global_ptr<dataT> dest,
397 local_ptr<dataT> src,
398 size_t numElements)
const {
399 return async_work_group_copy(dest, src, numElements, 1);
402 template <
typename... eventTN>
void wait_for(eventTN... Events)
const {
403 waitForHelper(Events...);
406 bool operator==(
const group<Dimensions> &rhs)
const {
407 bool Result = (rhs.globalRange == globalRange) &&
408 (rhs.localRange == localRange) && (rhs.index == index);
410 "inconsistent group class fields");
414 bool operator!=(
const group<Dimensions> &rhs)
const {
415 return !((*this) == rhs);
419 range<Dimensions> globalRange;
420 range<Dimensions> localRange;
421 range<Dimensions> groupRange;
422 id<Dimensions> index;
424 template <
int dims = Dimensions>
426 get_local_linear_id_impl()
const {
427 id<Dimensions> localId = get_local_id();
431 template <
int dims = Dimensions>
433 get_local_linear_id_impl()
const {
434 id<Dimensions> localId = get_local_id();
435 return localId[0] * localRange[1] + localId[1];
438 template <
int dims = Dimensions>
440 get_local_linear_id_impl()
const {
441 id<Dimensions> localId = get_local_id();
442 return (localId[0] * localRange[1] * localRange[2]) +
443 (localId[1] * localRange[2]) + localId[2];
446 template <
int dims = Dimensions>
448 get_local_linear_range_impl()
const {
449 auto localRange = get_local_range();
450 return localRange[0];
453 template <
int dims = Dimensions>
455 get_local_linear_range_impl()
const {
456 auto localRange = get_local_range();
457 return localRange[0] * localRange[1];
460 template <
int dims = Dimensions>
462 get_local_linear_range_impl()
const {
463 auto localRange = get_local_range();
464 return localRange[0] * localRange[1] * localRange[2];
467 template <
int dims = Dimensions>
469 get_group_linear_range_impl()
const {
470 auto groupRange = get_group_range();
471 return groupRange[0];
474 template <
int dims = Dimensions>
476 get_group_linear_range_impl()
const {
477 auto groupRange = get_group_range();
478 return groupRange[0] * groupRange[1];
481 template <
int dims = Dimensions>
483 get_group_linear_range_impl()
const {
484 auto groupRange = get_group_range();
485 return groupRange[0] * groupRange[1] * groupRange[2];
488 template <
int dims = Dimensions>
490 get_group_linear_id_impl()
const {
494 template <
int dims = Dimensions>
496 get_group_linear_id_impl()
const {
497 return index[0] * groupRange[1] + index[1];
510 template <
int dims = Dimensions>
512 get_group_linear_id_impl()
const {
513 return (index[0] * groupRange[1] * groupRange[2]) +
514 (index[1] * groupRange[2]) + index[2];
517 void waitForHelper()
const {}
519 void waitForHelper(device_event Event)
const { Event.wait(); }
521 template <
typename T,
typename... Ts>
522 void waitForHelper(T E, Ts... Es)
const {
524 waitForHelper(Es...);
528 friend class detail::Builder;
529 group(
const range<Dimensions> &G,
const range<Dimensions> &L,
530 const range<Dimensions> GroupRange,
const id<Dimensions> &I)
531 : globalRange(
G), localRange(L), groupRange(GroupRange), index(I) {
534 "global range is not multiple of local");
536 "inconsistent group constructor arguments");
543 #ifdef __SYCL_DEVICE_ONLY__
546 throw sycl::exception(
548 "Free function calls are not supported on host device");
552 namespace ext::oneapi::experimental {
554 #ifdef __SYCL_DEVICE_ONLY__
555 return sycl::detail::Builder::getElement(
558 throw sycl::exception(
560 "Free function calls are not supported on host device");
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL_DEPRECATED(message)
#define __SYCL2020_DEPRECATED(message)
conditional_t< TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetVectorT< SelectMatchingOpenCLType_t< T > >::type, conditional_t< TryToGetPointerT< SelectMatchingOpenCLType_t< T > >::value, typename TryToGetPointerVecT< SelectMatchingOpenCLType_t< T > >::type, SelectMatchingOpenCLType_t< T > >> ConvertToOpenCLType_t
constexpr __spv::MemorySemanticsMask::Flag getSPIRVMemorySemanticsMask(memory_order)
typename std::enable_if< B, T >::type enable_if_t
void write(GlobalBufAccessorT &GlobalFlushBuf, size_t FlushBufferSize, unsigned WIOffset, const char *Str, unsigned Len, unsigned Padding=0)
size_t get_local_linear_range(Group g)
static void workGroupBarrier()
Group::linear_id_type get_local_linear_id(Group g)
group< Dims > this_group()
constexpr std::enable_if_t< detail::IsCompileTimeProperty< PropertyT >::value, bool > operator!=(const property_value< PropertyT, A... > &, const property_value< PropertyT, B... > &)
T & operator[](std::ptrdiff_t idx) const noexcept
class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor class __SYCL_EBO __SYCL_SPECIAL_CLASS Dimensions
constexpr mode_tag_t< access_mode::read_write > read_write
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
---— Error handling, matching OpenCL plugin semantics.
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_ControlBarrier(__spv::Scope Execution, __spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyGlobalToLocal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
__SYCL_CONVERGENT__ SYCL_EXTERNAL void __spirv_MemoryBarrier(__spv::Scope Memory, uint32_t Semantics) noexcept
__SYCL_CONVERGENT__ __ocl_event_t __SYCL_OpGroupAsyncCopyLocalToGlobal(__spv::Scope::Flag, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, __ocl_event_t) noexcept
bool operator==(const Slab &Lhs, const Slab &Rhs)