31 : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
44 void setNDRangeLeftover(
int Dims_) {
45 for (
int I = Dims_; I < 3; ++I) {
47 LocalSize[I] = LocalSize[0] ? 1 : 0;
55 : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0} {}
57 template <
int Dims_>
void set(sycl::range<Dims_> NumWorkItems) {
58 for (
int I = 0; I < Dims_; ++I) {
59 GlobalSize[I] = NumWorkItems[I];
64 setNDRangeLeftover(Dims_);
71 void set(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset) {
72 for (
int I = 0; I < Dims_; ++I) {
73 GlobalSize[I] = NumWorkItems[I];
75 GlobalOffset[I] = Offset[I];
78 setNDRangeLeftover(Dims_);
82 template <
int Dims_>
void set(sycl::nd_range<Dims_> ExecutionRange) {
83 for (
int I = 0; I < Dims_; ++I) {
84 GlobalSize[I] = ExecutionRange.get_global_range()[I];
85 LocalSize[I] = ExecutionRange.get_local_range()[I];
86 GlobalOffset[I] = ExecutionRange.get_offset()[I];
89 setNDRangeLeftover(Dims_);
93 void set(
int Dims_, sycl::nd_range<3> ExecutionRange) {
94 for (
int I = 0; I < Dims_; ++I) {
95 GlobalSize[I] = ExecutionRange.get_global_range()[I];
96 LocalSize[I] = ExecutionRange.get_local_range()[I];
97 GlobalOffset[I] = ExecutionRange.get_offset()[I];
100 setNDRangeLeftover(Dims_);
105 for (
int I = 0; I < Dims_; ++I) {
110 NumWorkGroups[I] = N[I];
112 setNDRangeLeftover(Dims_);
127 static_assert(std::integral_constant<T, false>::value,
128 "Second template parameter is required to be of function type");
131 template <
typename F,
typename RetT,
typename... Args>
134 template <
typename T>
135 static constexpr
auto check(T *) ->
typename std::is_same<
136 decltype(std::declval<T>().
operator()(std::declval<Args>()...)),
139 template <
typename>
static constexpr std::false_type check(...);
141 using type = decltype(check<F>(0));
144 static constexpr
bool value = type::value;
147 template <
typename F,
typename... Args>
157 template <
typename KernelType,
typename LambdaArgType,
158 typename std::enable_if_t<std::is_same<LambdaArgType, void>::value>
161 return check_kernel_lambda_takes_args<KernelType, kernel_handler>();
164 template <
typename KernelType,
typename LambdaArgType,
165 typename std::enable_if_t<!std::is_same<LambdaArgType, void>::value>
173 template <
typename KernelType,
typename LambdaArgType =
void>
175 constexpr
static bool value =
176 isKernelLambdaCallableWithKernelHandlerImpl<KernelType, LambdaArgType>();
181 template <
typename KernelType>
182 typename std::enable_if_t<KernelLambdaHasKernelHandlerArgT<KernelType>::value>
188 template <
typename KernelType>
189 typename std::enable_if_t<!KernelLambdaHasKernelHandlerArgT<KernelType>::value>
194 template <
typename ArgType,
typename KernelType>
196 KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
202 template <
typename ArgType,
typename KernelType>
204 !KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
216 virtual char *getPtr() = 0;
221 std::function<void(sycl::interop_handler)> MFunc;
224 InteropTask(std::function<
void(sycl::interop_handler)> Func) : MFunc(Func) {}
225 void call(sycl::interop_handler &h) { MFunc(h); }
229 std::function<void()> MHostTask;
234 HostTask(std::function<
void()> &&Func) : MHostTask(Func) {}
244 template <
class KernelType,
class KernelArgType,
int Dims>
246 using IDBuilder = sycl::detail::Builder;
250 friend class sycl::handler;
266 AdjustedRange.
set(NDRDesc.
Dims,
272 for (
size_t I = 0; I < AdjustedRange.
Dims; ++I)
276 runOnHost(AdjustedRange);
281 char *
getPtr()
override {
return reinterpret_cast<char *
>(&MKernel); }
283 template <
class ArgT = KernelArgType>
289 template <
class ArgT = KernelArgType>
293 sycl::id<Dims> Offset;
294 sycl::range<Dims> Stride(
296 sycl::range<Dims> UpperBound(
298 for (
int I = 0; I < Dims; ++I) {
301 UpperBound[I] = Range[I] + Offset[I];
305 Offset, Stride, UpperBound,
306 [&](
const sycl::id<Dims> &ID) {
307 sycl::item<Dims,
true> Item =
308 IDBuilder::createItem<Dims, true>(Range, ID, Offset);
310 runKernelWithArg<const sycl::id<Dims> &>(MKernel, ID);
314 template <
class ArgT = KernelArgType>
316 std::is_same<ArgT,
item<Dims,
false>>::value>
320 for (
int I = 0; I < Dims; ++I)
324 sycl::item<Dims, false> Item =
325 IDBuilder::createItem<Dims, false>(Range, ID);
326 sycl::item<Dims, true> ItemWithOffset = Item;
328 runKernelWithArg<sycl::item<Dims, false>>(MKernel, Item);
332 template <
class ArgT = KernelArgType>
334 std::is_same<ArgT,
item<Dims,
true>>::value>
337 sycl::id<Dims> Offset;
338 sycl::range<Dims> Stride(
340 sycl::range<Dims> UpperBound(
342 for (
int I = 0; I < Dims; ++I) {
345 UpperBound[I] = Range[I] + Offset[I];
349 Offset, Stride, UpperBound,
350 [&](
const sycl::id<Dims> &ID) {
351 sycl::item<Dims,
true> Item =
352 IDBuilder::createItem<Dims, true>(Range, ID, Offset);
358 template <
class ArgT = KernelArgType>
362 for (
int I = 0; I < Dims; ++I) {
365 throw sycl::nd_range_error(
"Invalid local size for global size",
366 PI_ERROR_INVALID_WORK_GROUP_SIZE);
371 sycl::range<Dims> GlobalSize(
373 sycl::id<Dims> GlobalOffset;
374 for (
int I = 0; I < Dims; ++I) {
381 sycl::group<Dims> Group = IDBuilder::createGroup<Dims>(
382 GlobalSize, LocalSize, GroupSize, GroupID);
385 id<Dims> GlobalID = GroupID * LocalSize + LocalID + GlobalOffset;
386 const sycl::item<Dims,
true> GlobalItem =
387 IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
389 const sycl::item<Dims,
false> LocalItem =
390 IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
391 const sycl::nd_item<Dims> NDItem =
392 IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
394 runKernelWithArg<const sycl::nd_item<Dims>>(MKernel, NDItem);
399 template <
typename ArgT = KernelArgType>
404 for (
int I = 0; I < Dims; ++I) {
407 throw sycl::nd_range_error(
"Invalid local size for global size",
408 PI_ERROR_INVALID_WORK_GROUP_SIZE);
413 sycl::range<Dims> GlobalSize(
415 for (
int I = 0; I < Dims; ++I) {
420 sycl::group<Dims> Group =
421 IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, NGroups, GroupID);
422 runKernelWithArg<sycl::group<Dims>>(MKernel, Group);