DPC++ Runtime
Runtime libraries for oneAPI DPC++
cg_types.hpp
Go to the documentation of this file.
1 //==---- cg_types.hpp - Auxiliary types required by command group class ----==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
11 #include <sycl/detail/array.hpp> // for array
12 #include <sycl/detail/common.hpp> // for InitializedVal, NDLoop
13 #include <sycl/detail/helpers.hpp> // for Builder
14 #include <sycl/detail/host_profiling_info.hpp> // for HostProfilingInfo
15 #include <sycl/detail/item_base.hpp> // for id
16 #include <sycl/detail/kernel_desc.hpp> // for kernel_param_kind_t
17 #include <sycl/detail/pi.h> // for PI_ERROR_INVALID_WORK...
18 #include <sycl/exception.hpp> // for nd_range_error
19 #include <sycl/group.hpp> // for group
20 #include <sycl/h_item.hpp> // for h_item
21 #include <sycl/id.hpp> // for id
22 #include <sycl/interop_handle.hpp> // for interop_handle
23 #include <sycl/item.hpp> // for item
24 #include <sycl/kernel_handler.hpp> // for kernel_handler
25 #include <sycl/nd_item.hpp> // for nd_item
26 #include <sycl/nd_range.hpp> // for nd_range
27 #include <sycl/range.hpp> // for range, operator*
28 
29 #include <functional> // for function
30 #include <stddef.h> // for size_t
31 #include <type_traits> // for enable_if_t, false_type
32 #include <utility> // for declval
33 
34 namespace sycl {
35 inline namespace _V1 {
36 namespace detail {
37 
38 // The structure represents kernel argument.
39 class ArgDesc {
40 public:
41  ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size,
42  int Index)
43  : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
44 
46  void *MPtr;
47  int MSize;
48  int MIndex;
49 };
50 
51 // The structure represents NDRange - global, local sizes, global offset and
52 // number of dimensions.
53 class NDRDescT {
54  // The method initializes all sizes for dimensions greater than the passed one
55  // to the default values, so they will not affect execution.
56  void setNDRangeLeftover(int Dims_) {
57  for (int I = Dims_; I < 3; ++I) {
58  GlobalSize[I] = 1;
59  LocalSize[I] = LocalSize[0] ? 1 : 0;
60  GlobalOffset[I] = 0;
61  NumWorkGroups[I] = 0;
62  }
63  }
64 
65 public:
67  : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0},
68  Dims{0} {}
69 
70  template <int Dims_> void set(sycl::range<Dims_> NumWorkItems) {
71  for (int I = 0; I < Dims_; ++I) {
72  GlobalSize[I] = NumWorkItems[I];
73  LocalSize[I] = 0;
74  GlobalOffset[I] = 0;
75  NumWorkGroups[I] = 0;
76  }
77  setNDRangeLeftover(Dims_);
78  Dims = Dims_;
79  }
80 
81  // Initializes this ND range descriptor with given range of work items and
82  // offset.
83  template <int Dims_>
84  void set(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset) {
85  for (int I = 0; I < Dims_; ++I) {
86  GlobalSize[I] = NumWorkItems[I];
87  LocalSize[I] = 0;
88  GlobalOffset[I] = Offset[I];
89  NumWorkGroups[I] = 0;
90  }
91  setNDRangeLeftover(Dims_);
92  Dims = Dims_;
93  }
94 
95  template <int Dims_> void set(sycl::nd_range<Dims_> ExecutionRange) {
96  for (int I = 0; I < Dims_; ++I) {
97  GlobalSize[I] = ExecutionRange.get_global_range()[I];
98  LocalSize[I] = ExecutionRange.get_local_range()[I];
99  GlobalOffset[I] = ExecutionRange.get_offset()[I];
100  NumWorkGroups[I] = 0;
101  }
102  setNDRangeLeftover(Dims_);
103  Dims = Dims_;
104  }
105 
106  void set(int Dims_, sycl::nd_range<3> ExecutionRange) {
107  for (int I = 0; I < Dims_; ++I) {
108  GlobalSize[I] = ExecutionRange.get_global_range()[I];
109  LocalSize[I] = ExecutionRange.get_local_range()[I];
110  GlobalOffset[I] = ExecutionRange.get_offset()[I];
111  NumWorkGroups[I] = 0;
112  }
113  setNDRangeLeftover(Dims_);
114  Dims = Dims_;
115  }
116 
117  template <int Dims_> void setNumWorkGroups(sycl::range<Dims_> N) {
118  for (int I = 0; I < Dims_; ++I) {
119  GlobalSize[I] = 0;
120  // '0' is a mark to adjust before kernel launch when there is enough info:
121  LocalSize[I] = 0;
122  GlobalOffset[I] = 0;
123  NumWorkGroups[I] = N[I];
124  }
125  setNDRangeLeftover(Dims_);
126  Dims = Dims_;
127  }
128 
136  size_t Dims;
137 };
138 
139 template <typename, typename T> struct check_fn_signature {
140  static_assert(std::integral_constant<T, false>::value,
141  "Second template parameter is required to be of function type");
142 };
143 
144 template <typename F, typename RetT, typename... Args>
145 struct check_fn_signature<F, RetT(Args...)> {
146 private:
147  template <typename T>
148  static constexpr auto check(T *) -> typename std::is_same<
149  decltype(std::declval<T>().operator()(std::declval<Args>()...)),
150  RetT>::type;
151 
152  template <typename> static constexpr std::false_type check(...);
153 
154  using type = decltype(check<F>(0));
155 
156 public:
157  static constexpr bool value = type::value;
158 };
159 
160 template <typename F, typename... Args>
161 static constexpr bool check_kernel_lambda_takes_args() {
162  return check_fn_signature<std::remove_reference_t<F>, void(Args...)>::value;
163 }
164 
165 // isKernelLambdaCallableWithKernelHandlerImpl checks if LambdaArgType is void
166 // (e.g., in single_task), and based on that, calls
167 // check_kernel_lambda_takes_args with proper set of arguments. Also this type
168 // trait workarounds compilation error which happens only with msvc.
169 
170 template <
171  typename KernelType, typename LambdaArgType,
172  typename std::enable_if_t<std::is_same_v<LambdaArgType, void>> * = nullptr>
174  return check_kernel_lambda_takes_args<KernelType, kernel_handler>();
175 }
176 
177 template <
178  typename KernelType, typename LambdaArgType,
179  typename std::enable_if_t<!std::is_same_v<LambdaArgType, void>> * = nullptr>
181  return check_kernel_lambda_takes_args<KernelType, LambdaArgType,
182  kernel_handler>();
183 }
184 
185 // Type trait to find out if kernal lambda has kernel_handler argument
186 template <typename KernelType, typename LambdaArgType = void>
188  constexpr static bool value =
189  isKernelLambdaCallableWithKernelHandlerImpl<KernelType, LambdaArgType>();
190 };
191 
192 // Helpers for running kernel lambda on the host device
193 
194 template <typename KernelType>
195 typename std::enable_if_t<KernelLambdaHasKernelHandlerArgT<KernelType>::value>
196 runKernelWithoutArg(KernelType KernelName) {
197  kernel_handler KH;
198  KernelName(KH);
199 }
200 
201 template <typename KernelType>
202 typename std::enable_if_t<!KernelLambdaHasKernelHandlerArgT<KernelType>::value>
203 runKernelWithoutArg(KernelType KernelName) {
204  KernelName();
205 }
206 
207 template <typename ArgType, typename KernelType>
208 typename std::enable_if_t<
210 runKernelWithArg(KernelType KernelName, ArgType Arg) {
211  kernel_handler KH;
212  KernelName(Arg, KH);
213 }
214 
215 template <typename ArgType, typename KernelType>
216 typename std::enable_if_t<
218 runKernelWithArg(KernelType KernelName, ArgType Arg) {
219  KernelName(Arg);
220 }
221 
222 // The pure virtual class aimed to store lambda/functors of any type.
224 public:
225  // The method executes lambda stored using NDRange passed.
226  virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0;
227  // Return pointer to the lambda object.
228  // Used to extract captured variables.
229  virtual char *getPtr() = 0;
230  virtual ~HostKernelBase() = default;
231 };
232 
233 class HostTask {
234  std::function<void()> MHostTask;
235  std::function<void(interop_handle)> MInteropTask;
236 
237 public:
238  HostTask() : MHostTask([]() {}) {}
239  HostTask(std::function<void()> &&Func) : MHostTask(Func) {}
240  HostTask(std::function<void(interop_handle)> &&Func) : MInteropTask(Func) {}
241 
242  bool isInteropTask() const { return !!MInteropTask; }
243 
244  void call(HostProfilingInfo *HPI) {
245  if (HPI)
246  HPI->start();
247  MHostTask();
248  if (HPI)
249  HPI->end();
250  }
251 
252  void call(HostProfilingInfo *HPI, interop_handle handle) {
253  if (HPI)
254  HPI->start();
255  MInteropTask(handle);
256  if (HPI)
257  HPI->end();
258  }
259 };
260 
261 // Class which stores specific lambda object.
262 template <class KernelType, class KernelArgType, int Dims>
263 class HostKernel : public HostKernelBase {
264  using IDBuilder = sycl::detail::Builder;
265  KernelType MKernel;
266  // Allowing accessing MKernel from 'ResetHostKernelHelper' method of
267  // 'sycl::handler'
268  friend class sycl::handler;
269 
270 public:
271  HostKernel(KernelType Kernel) : MKernel(Kernel) {}
272  void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
273  // adjust ND range for serial host:
274  NDRDescT AdjustedRange = NDRDesc;
275 
276  if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) {
277  // This is a special case - NDRange information is not complete, only the
278  // desired number of work groups is set by the user. Choose work group
279  // size (LocalSize), calculate the missing NDRange characteristics
280  // needed to invoke the kernel and adjust the NDRange descriptor
281  // accordingly. For some devices the work group size selection requires
282  // access to the device's properties, hence such late "adjustment".
283  range<3> WGsize{1, 1, 1}; // no better alternative for serial host?
284  AdjustedRange.set(NDRDesc.Dims,
285  nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize));
286  }
287  // If local size for host is not set explicitly, let's adjust it to 1,
288  // so nd_range_error for zero local size is not thrown.
289  if (AdjustedRange.LocalSize[0] == 0)
290  for (size_t I = 0; I < AdjustedRange.Dims; ++I)
291  AdjustedRange.LocalSize[I] = 1;
292  if (HPI)
293  HPI->start();
294  runOnHost(AdjustedRange);
295  if (HPI)
296  HPI->end();
297  }
298 
299  char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }
300 
301  template <class ArgT = KernelArgType>
302  typename std::enable_if_t<std::is_same_v<ArgT, void>>
303  runOnHost(const NDRDescT &) {
304  runKernelWithoutArg(MKernel);
305  }
306 
307  template <class ArgT = KernelArgType>
308  typename std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>>
309  runOnHost(const NDRDescT &NDRDesc) {
311  sycl::id<Dims> Offset;
312  sycl::range<Dims> Stride(
313  InitializedVal<Dims, range>::template get<1>()); // initialized to 1
314  sycl::range<Dims> UpperBound(
316  for (int I = 0; I < Dims; ++I) {
317  Range[I] = NDRDesc.GlobalSize[I];
318  Offset[I] = NDRDesc.GlobalOffset[I];
319  UpperBound[I] = Range[I] + Offset[I];
320  }
321 
323  /*LowerBound=*/Offset, Stride, UpperBound,
324  [&](const sycl::id<Dims> &ID) {
325  sycl::item<Dims, /*Offset=*/true> Item =
326  IDBuilder::createItem<Dims, true>(Range, ID, Offset);
327 
328  runKernelWithArg<const sycl::id<Dims> &>(MKernel, ID);
329  });
330  }
331 
332  template <class ArgT = KernelArgType>
333  typename std::enable_if_t<std::is_same_v<ArgT, item<Dims, /*Offset=*/false>>>
334  runOnHost(const NDRDescT &NDRDesc) {
335  sycl::id<Dims> ID;
337  for (int I = 0; I < Dims; ++I)
338  Range[I] = NDRDesc.GlobalSize[I];
339 
340  detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> ID) {
341  sycl::item<Dims, /*Offset=*/false> Item =
342  IDBuilder::createItem<Dims, false>(Range, ID);
343  sycl::item<Dims, /*Offset=*/true> ItemWithOffset = Item;
344 
345  runKernelWithArg<sycl::item<Dims, /*Offset=*/false>>(MKernel, Item);
346  });
347  }
348 
349  template <class ArgT = KernelArgType>
350  typename std::enable_if_t<std::is_same_v<ArgT, item<Dims, /*Offset=*/true>>>
351  runOnHost(const NDRDescT &NDRDesc) {
353  sycl::id<Dims> Offset;
354  sycl::range<Dims> Stride(
355  InitializedVal<Dims, range>::template get<1>()); // initialized to 1
356  sycl::range<Dims> UpperBound(
358  for (int I = 0; I < Dims; ++I) {
359  Range[I] = NDRDesc.GlobalSize[I];
360  Offset[I] = NDRDesc.GlobalOffset[I];
361  UpperBound[I] = Range[I] + Offset[I];
362  }
363 
365  /*LowerBound=*/Offset, Stride, UpperBound,
366  [&](const sycl::id<Dims> &ID) {
367  sycl::item<Dims, /*Offset=*/true> Item =
368  IDBuilder::createItem<Dims, true>(Range, ID, Offset);
369 
370  runKernelWithArg<sycl::item<Dims, /*Offset=*/true>>(MKernel, Item);
371  });
372  }
373 
374  template <class ArgT = KernelArgType>
375  typename std::enable_if_t<std::is_same_v<ArgT, nd_item<Dims>>>
376  runOnHost(const NDRDescT &NDRDesc) {
378  for (int I = 0; I < Dims; ++I) {
379  if (NDRDesc.LocalSize[I] == 0 ||
380  NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
381  throw sycl::nd_range_error("Invalid local size for global size",
382  PI_ERROR_INVALID_WORK_GROUP_SIZE);
383  GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
384  }
385 
387  sycl::range<Dims> GlobalSize(
389  sycl::id<Dims> GlobalOffset;
390  for (int I = 0; I < Dims; ++I) {
391  GlobalOffset[I] = NDRDesc.GlobalOffset[I];
392  LocalSize[I] = NDRDesc.LocalSize[I];
393  GlobalSize[I] = NDRDesc.GlobalSize[I];
394  }
395 
396  detail::NDLoop<Dims>::iterate(GroupSize, [&](const id<Dims> &GroupID) {
397  sycl::group<Dims> Group = IDBuilder::createGroup<Dims>(
398  GlobalSize, LocalSize, GroupSize, GroupID);
399 
400  detail::NDLoop<Dims>::iterate(LocalSize, [&](const id<Dims> &LocalID) {
401  id<Dims> GlobalID =
402  GroupID * id<Dims>{LocalSize} + LocalID + GlobalOffset;
403  const sycl::item<Dims, /*Offset=*/true> GlobalItem =
404  IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
405  GlobalOffset);
406  const sycl::item<Dims, /*Offset=*/false> LocalItem =
407  IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
408  const sycl::nd_item<Dims> NDItem =
409  IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
410 
411  runKernelWithArg<const sycl::nd_item<Dims>>(MKernel, NDItem);
412  });
413  });
414  }
415 
416  template <typename ArgT = KernelArgType>
417  std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>>
418  runOnHost(const NDRDescT &NDRDesc) {
420 
421  for (int I = 0; I < Dims; ++I) {
422  if (NDRDesc.LocalSize[I] == 0 ||
423  NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
424  throw sycl::nd_range_error("Invalid local size for global size",
425  PI_ERROR_INVALID_WORK_GROUP_SIZE);
426  NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
427  }
428 
430  sycl::range<Dims> GlobalSize(
432  for (int I = 0; I < Dims; ++I) {
433  LocalSize[I] = NDRDesc.LocalSize[I];
434  GlobalSize[I] = NDRDesc.GlobalSize[I];
435  }
436  detail::NDLoop<Dims>::iterate(NGroups, [&](const id<Dims> &GroupID) {
437  sycl::group<Dims> Group =
438  IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, NGroups, GroupID);
439  runKernelWithArg<sycl::group<Dims>>(MKernel, Group);
440  });
441  }
442 
443  ~HostKernel() = default;
444 };
445 
446 } // namespace detail
447 } // namespace _V1
448 } // namespace sycl
ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, int Index)
Definition: cg_types.hpp:41
sycl::detail::kernel_param_kind_t MType
Definition: cg_types.hpp:45
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI)=0
std::enable_if_t< std::is_same_v< ArgT, nd_item< Dims > > > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:376
std::enable_if_t< std::is_same_v< ArgT, void > > runOnHost(const NDRDescT &)
Definition: cg_types.hpp:303
std::enable_if_t< std::is_same_v< ArgT, sycl::group< Dims > > > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:418
HostKernel(KernelType Kernel)
Definition: cg_types.hpp:271
std::enable_if_t< std::is_same_v< ArgT, item< Dims, false > > > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:334
std::enable_if_t< std::is_same_v< ArgT, item< Dims, true > > > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:351
char * getPtr() override
Definition: cg_types.hpp:299
std::enable_if_t< std::is_same_v< ArgT, sycl::id< Dims > > > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:309
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override
Definition: cg_types.hpp:272
Profiling info for the host execution.
void end()
Measures event's end time.
Definition: event_impl.cpp:481
void start()
Measures event's start time.
Definition: event_impl.cpp:479
void call(HostProfilingInfo *HPI, interop_handle handle)
Definition: cg_types.hpp:252
HostTask(std::function< void()> &&Func)
Definition: cg_types.hpp:239
HostTask(std::function< void(interop_handle)> &&Func)
Definition: cg_types.hpp:240
void call(HostProfilingInfo *HPI)
Definition: cg_types.hpp:244
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:129
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
Definition: cg_types.hpp:135
void setNumWorkGroups(sycl::range< Dims_ > N)
Definition: cg_types.hpp:117
void set(int Dims_, sycl::nd_range< 3 > ExecutionRange)
Definition: cg_types.hpp:106
void set(sycl::range< Dims_ > NumWorkItems, sycl::id< Dims_ > Offset)
Definition: cg_types.hpp:84
sycl::id< 3 > GlobalOffset
Definition: cg_types.hpp:131
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:130
void set(sycl::nd_range< Dims_ > ExecutionRange)
Definition: cg_types.hpp:95
void set(sycl::range< Dims_ > NumWorkItems)
Definition: cg_types.hpp:70
Command group handler class.
Definition: handler.hpp:458
A unique identifier of an item in an index space.
Definition: id.hpp:36
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:37
Identifies an instance of the function object executing at each point in an nd_range.
Definition: nd_item.hpp:48
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:22
range< Dimensions > get_global_range() const
Definition: nd_range.hpp:43
range< Dimensions > get_local_range() const
Definition: nd_range.hpp:45
id< Dimensions > get_offset() const
Definition: nd_range.hpp:50
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
Definition: cg_types.hpp:210
std::enable_if_t< KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
Definition: cg_types.hpp:196
constexpr bool isKernelLambdaCallableWithKernelHandlerImpl()
Definition: cg_types.hpp:173
static constexpr bool check_kernel_lambda_takes_args()
Definition: cg_types.hpp:161
constexpr if(sizeof(T)==8)
Definition: access.hpp:18
Generates an NDims-dimensional perfect loop nest.
Definition: common.hpp:319
static __SYCL_ALWAYS_INLINE void iterate(const LoopBoundTy< NDims > &UpperBound, FuncTy f)
Generates ND loop nest with {0,..0} .
Definition: common.hpp:325