31 #include <type_traits>
35 inline namespace _V1 {
56 void setNDRangeLeftover(
int Dims_) {
57 for (
int I = Dims_; I < 3; ++I) {
71 for (
int I = 0; I < Dims_; ++I) {
77 setNDRangeLeftover(Dims_);
85 for (
int I = 0; I < Dims_; ++I) {
91 setNDRangeLeftover(Dims_);
96 for (
int I = 0; I < Dims_; ++I) {
102 setNDRangeLeftover(Dims_);
107 for (
int I = 0; I < Dims_; ++I) {
113 setNDRangeLeftover(Dims_);
118 for (
int I = 0; I < Dims_; ++I) {
125 setNDRangeLeftover(Dims_);
140 static_assert(std::integral_constant<T, false>::value,
141 "Second template parameter is required to be of function type");
144 template <
typename F,
typename RetT,
typename... Args>
147 template <
typename T>
148 static constexpr
auto check(T *) ->
typename std::is_same<
149 decltype(std::declval<T>().
operator()(std::declval<Args>()...)),
152 template <
typename>
static constexpr std::false_type check(...);
154 using type = decltype(check<F>(0));
157 static constexpr
bool value = type::value;
160 template <
typename F,
typename... Args>
171 typename KernelType,
typename LambdaArgType,
172 typename std::enable_if_t<std::is_same_v<LambdaArgType, void>> * =
nullptr>
174 return check_kernel_lambda_takes_args<KernelType, kernel_handler>();
178 typename KernelType,
typename LambdaArgType,
179 typename std::enable_if_t<!std::is_same_v<LambdaArgType, void>> * =
nullptr>
186 template <
typename KernelType,
typename LambdaArgType =
void>
189 isKernelLambdaCallableWithKernelHandlerImpl<KernelType, LambdaArgType>();
194 template <
typename KernelType>
195 typename std::enable_if_t<KernelLambdaHasKernelHandlerArgT<KernelType>::value>
201 template <
typename KernelType>
202 typename std::enable_if_t<!KernelLambdaHasKernelHandlerArgT<KernelType>::value>
207 template <
typename ArgType,
typename KernelType>
208 typename std::enable_if_t<
215 template <
typename ArgType,
typename KernelType>
216 typename std::enable_if_t<
234 std::function<void()> MHostTask;
239 HostTask(std::function<
void()> &&Func) : MHostTask(Func) {}
255 MInteropTask(handle);
262 template <
class KernelType,
class KernelArgType,
int Dims>
264 using IDBuilder = sycl::detail::Builder;
284 AdjustedRange.
set(NDRDesc.
Dims,
290 for (
size_t I = 0; I < AdjustedRange.
Dims; ++I)
299 char *
getPtr()
override {
return reinterpret_cast<char *
>(&MKernel); }
301 template <
class ArgT = KernelArgType>
302 typename std::enable_if_t<std::is_same_v<ArgT, void>>
307 template <
class ArgT = KernelArgType>
308 typename std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>>
316 for (
int I = 0; I < Dims; ++I) {
319 UpperBound[I] = Range[I] + Offset[I];
323 Offset, Stride, UpperBound,
326 IDBuilder::createItem<Dims, true>(Range, ID, Offset);
328 runKernelWithArg<const sycl::id<Dims> &>(MKernel, ID);
332 template <
class ArgT = KernelArgType>
333 typename std::enable_if_t<std::is_same_v<ArgT,
item<Dims,
false>>>
337 for (
int I = 0; I < Dims; ++I)
341 sycl::item<Dims, false> Item =
342 IDBuilder::createItem<Dims, false>(Range, ID);
343 sycl::item<Dims, true> ItemWithOffset = Item;
345 runKernelWithArg<sycl::item<Dims, false>>(MKernel, Item);
349 template <
class ArgT = KernelArgType>
350 typename std::enable_if_t<std::is_same_v<ArgT,
item<Dims,
true>>>
358 for (
int I = 0; I < Dims; ++I) {
361 UpperBound[I] = Range[I] + Offset[I];
365 Offset, Stride, UpperBound,
368 IDBuilder::createItem<Dims, true>(Range, ID, Offset);
374 template <
class ArgT = KernelArgType>
375 typename std::enable_if_t<std::is_same_v<ArgT, nd_item<Dims>>>
378 for (
int I = 0; I < Dims; ++I) {
381 throw sycl::nd_range_error(
"Invalid local size for global size",
382 PI_ERROR_INVALID_WORK_GROUP_SIZE);
390 for (
int I = 0; I < Dims; ++I) {
398 GlobalSize, LocalSize, GroupSize, GroupID);
402 GroupID *
id<Dims>{LocalSize} + LocalID + GlobalOffset;
404 IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
407 IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
409 IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
411 runKernelWithArg<const sycl::nd_item<Dims>>(MKernel, NDItem);
416 template <
typename ArgT = KernelArgType>
417 std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>>
421 for (
int I = 0; I < Dims; ++I) {
424 throw sycl::nd_range_error(
"Invalid local size for global size",
425 PI_ERROR_INVALID_WORK_GROUP_SIZE);
432 for (
int I = 0; I < Dims; ++I) {
438 IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, NGroups, GroupID);
439 runKernelWithArg<sycl::group<Dims>>(MKernel, Group);
ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, int Index)
sycl::detail::kernel_param_kind_t MType
virtual char * getPtr()=0
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI)=0
virtual ~HostKernelBase()=default
std::enable_if_t< std::is_same_v< ArgT, nd_item< Dims > > > runOnHost(const NDRDescT &NDRDesc)
std::enable_if_t< std::is_same_v< ArgT, void > > runOnHost(const NDRDescT &)
std::enable_if_t< std::is_same_v< ArgT, sycl::group< Dims > > > runOnHost(const NDRDescT &NDRDesc)
HostKernel(KernelType Kernel)
std::enable_if_t< std::is_same_v< ArgT, item< Dims, false > > > runOnHost(const NDRDescT &NDRDesc)
std::enable_if_t< std::is_same_v< ArgT, item< Dims, true > > > runOnHost(const NDRDescT &NDRDesc)
std::enable_if_t< std::is_same_v< ArgT, sycl::id< Dims > > > runOnHost(const NDRDescT &NDRDesc)
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override
Profiling info for the host execution.
void end()
Measures event's end time.
void start()
Measures event's start time.
void call(HostProfilingInfo *HPI, interop_handle handle)
bool isInteropTask() const
HostTask(std::function< void()> &&Func)
HostTask(std::function< void(interop_handle)> &&Func)
void call(HostProfilingInfo *HPI)
sycl::range< 3 > GlobalSize
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
void setNumWorkGroups(sycl::range< Dims_ > N)
void set(int Dims_, sycl::nd_range< 3 > ExecutionRange)
void set(sycl::range< Dims_ > NumWorkItems, sycl::id< Dims_ > Offset)
sycl::id< 3 > GlobalOffset
sycl::range< 3 > LocalSize
void set(sycl::nd_range< Dims_ > ExecutionRange)
void set(sycl::range< Dims_ > NumWorkItems)
Command group handler class.
A unique identifier of an item in an index space.
Identifies an instance of the function object executing at each point in a range.
Identifies an instance of the function object executing at each point in an nd_range.
Defines the iteration domain of both the work-groups and the overall dispatch.
range< Dimensions > get_global_range() const
range< Dimensions > get_local_range() const
id< Dimensions > get_offset() const
Defines the iteration domain of either a single work-group in a parallel dispatch,...
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
constexpr bool isKernelLambdaCallableWithKernelHandlerImpl()
static constexpr bool check_kernel_lambda_takes_args()
constexpr if(sizeof(T)==8)
constexpr static bool value
Generates an NDims-dimensional perfect loop nest.
static __SYCL_ALWAYS_INLINE void iterate(const LoopBoundTy< NDims > &UpperBound, FuncTy f)
Generates ND loop nest with {0,..0} .