DPC++ Runtime
Runtime libraries for oneAPI DPC++
cg_types.hpp
Go to the documentation of this file.
1 //==---- cg_types.hpp - Auxiliary types required by command group class ----==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #pragma once
10 
13 #include <sycl/group.hpp>
14 #include <sycl/id.hpp>
15 #include <sycl/interop_handle.hpp>
16 #include <sycl/interop_handler.hpp>
17 #include <sycl/kernel.hpp>
18 #include <sycl/kernel_handler.hpp>
19 #include <sycl/nd_item.hpp>
20 #include <sycl/range.hpp>
21 
22 namespace sycl {
24 namespace detail {
25 
26 // The structure represents kernel argument.
27 class ArgDesc {
28 public:
29  ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size,
30  int Index)
31  : MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
32 
34  void *MPtr;
35  int MSize;
36  int MIndex;
37 };
38 
39 // The structure represents NDRange - global, local sizes, global offset and
40 // number of dimensions.
41 class NDRDescT {
42  // The method initializes all sizes for dimensions greater than the passed one
43  // to the default values, so they will not affect execution.
44  void setNDRangeLeftover(int Dims_) {
45  for (int I = Dims_; I < 3; ++I) {
46  GlobalSize[I] = 1;
47  LocalSize[I] = LocalSize[0] ? 1 : 0;
48  GlobalOffset[I] = 0;
49  NumWorkGroups[I] = 0;
50  }
51  }
52 
53 public:
55  : GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0} {}
56 
57  template <int Dims_> void set(sycl::range<Dims_> NumWorkItems) {
58  for (int I = 0; I < Dims_; ++I) {
59  GlobalSize[I] = NumWorkItems[I];
60  LocalSize[I] = 0;
61  GlobalOffset[I] = 0;
62  NumWorkGroups[I] = 0;
63  }
64  setNDRangeLeftover(Dims_);
65  Dims = Dims_;
66  }
67 
68  // Initializes this ND range descriptor with given range of work items and
69  // offset.
70  template <int Dims_>
71  void set(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset) {
72  for (int I = 0; I < Dims_; ++I) {
73  GlobalSize[I] = NumWorkItems[I];
74  LocalSize[I] = 0;
75  GlobalOffset[I] = Offset[I];
76  NumWorkGroups[I] = 0;
77  }
78  setNDRangeLeftover(Dims_);
79  Dims = Dims_;
80  }
81 
82  template <int Dims_> void set(sycl::nd_range<Dims_> ExecutionRange) {
83  for (int I = 0; I < Dims_; ++I) {
84  GlobalSize[I] = ExecutionRange.get_global_range()[I];
85  LocalSize[I] = ExecutionRange.get_local_range()[I];
86  GlobalOffset[I] = ExecutionRange.get_offset()[I];
87  NumWorkGroups[I] = 0;
88  }
89  setNDRangeLeftover(Dims_);
90  Dims = Dims_;
91  }
92 
93  void set(int Dims_, sycl::nd_range<3> ExecutionRange) {
94  for (int I = 0; I < Dims_; ++I) {
95  GlobalSize[I] = ExecutionRange.get_global_range()[I];
96  LocalSize[I] = ExecutionRange.get_local_range()[I];
97  GlobalOffset[I] = ExecutionRange.get_offset()[I];
98  NumWorkGroups[I] = 0;
99  }
100  setNDRangeLeftover(Dims_);
101  Dims = Dims_;
102  }
103 
104  template <int Dims_> void setNumWorkGroups(sycl::range<Dims_> N) {
105  for (int I = 0; I < Dims_; ++I) {
106  GlobalSize[I] = 0;
107  // '0' is a mark to adjust before kernel launch when there is enough info:
108  LocalSize[I] = 0;
109  GlobalOffset[I] = 0;
110  NumWorkGroups[I] = N[I];
111  }
112  setNDRangeLeftover(Dims_);
113  Dims = Dims_;
114  }
115 
116  sycl::range<3> GlobalSize;
117  sycl::range<3> LocalSize;
118  sycl::id<3> GlobalOffset;
122  sycl::range<3> NumWorkGroups;
123  size_t Dims;
124 };
125 
126 template <typename, typename T> struct check_fn_signature {
127  static_assert(std::integral_constant<T, false>::value,
128  "Second template parameter is required to be of function type");
129 };
130 
131 template <typename F, typename RetT, typename... Args>
132 struct check_fn_signature<F, RetT(Args...)> {
133 private:
134  template <typename T>
135  static constexpr auto check(T *) -> typename std::is_same<
136  decltype(std::declval<T>().operator()(std::declval<Args>()...)),
137  RetT>::type;
138 
139  template <typename> static constexpr std::false_type check(...);
140 
141  using type = decltype(check<F>(0));
142 
143 public:
144  static constexpr bool value = type::value;
145 };
146 
147 template <typename F, typename... Args>
148 static constexpr bool check_kernel_lambda_takes_args() {
149  return check_fn_signature<std::remove_reference_t<F>, void(Args...)>::value;
150 }
151 
152 // isKernelLambdaCallableWithKernelHandlerImpl checks if LambdaArgType is void
153 // (e.g., in single_task), and based on that, calls
154 // check_kernel_lambda_takes_args with proper set of arguments. Also this type
155 // trait workarounds compilation error which happens only with msvc.
156 
157 template <typename KernelType, typename LambdaArgType,
158  typename std::enable_if_t<std::is_same<LambdaArgType, void>::value>
159  * = nullptr>
161  return check_kernel_lambda_takes_args<KernelType, kernel_handler>();
162 }
163 
164 template <typename KernelType, typename LambdaArgType,
165  typename std::enable_if_t<!std::is_same<LambdaArgType, void>::value>
166  * = nullptr>
168  return check_kernel_lambda_takes_args<KernelType, LambdaArgType,
169  kernel_handler>();
170 }
171 
172 // Type trait to find out if kernal lambda has kernel_handler argument
173 template <typename KernelType, typename LambdaArgType = void>
175  constexpr static bool value =
176  isKernelLambdaCallableWithKernelHandlerImpl<KernelType, LambdaArgType>();
177 };
178 
179 // Helpers for running kernel lambda on the host device
180 
181 template <typename KernelType>
182 typename std::enable_if_t<KernelLambdaHasKernelHandlerArgT<KernelType>::value>
183 runKernelWithoutArg(KernelType KernelName) {
184  kernel_handler KH;
185  KernelName(KH);
186 }
187 
188 template <typename KernelType>
189 typename std::enable_if_t<!KernelLambdaHasKernelHandlerArgT<KernelType>::value>
190 runKernelWithoutArg(KernelType KernelName) {
191  KernelName();
192 }
193 
194 template <typename ArgType, typename KernelType>
195 typename std::enable_if_t<
196  KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
197 runKernelWithArg(KernelType KernelName, ArgType Arg) {
198  kernel_handler KH;
199  KernelName(Arg, KH);
200 }
201 
202 template <typename ArgType, typename KernelType>
203 typename std::enable_if_t<
204  !KernelLambdaHasKernelHandlerArgT<KernelType, ArgType>::value>
205 runKernelWithArg(KernelType KernelName, ArgType Arg) {
206  KernelName(Arg);
207 }
208 
209 // The pure virtual class aimed to store lambda/functors of any type.
211 public:
212  // The method executes lambda stored using NDRange passed.
213  virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0;
214  // Return pointer to the lambda object.
215  // Used to extract captured variables.
216  virtual char *getPtr() = 0;
217  virtual ~HostKernelBase() = default;
218 };
219 
220 class InteropTask {
221  std::function<void(sycl::interop_handler)> MFunc;
222 
223 public:
224  InteropTask(std::function<void(sycl::interop_handler)> Func) : MFunc(Func) {}
225  void call(sycl::interop_handler &h) { MFunc(h); }
226 };
227 
228 class HostTask {
229  std::function<void()> MHostTask;
230  std::function<void(interop_handle)> MInteropTask;
231 
232 public:
233  HostTask() : MHostTask([]() {}) {}
234  HostTask(std::function<void()> &&Func) : MHostTask(Func) {}
235  HostTask(std::function<void(interop_handle)> &&Func) : MInteropTask(Func) {}
236 
237  bool isInteropTask() const { return !!MInteropTask; }
238 
239  void call() { MHostTask(); }
240  void call(interop_handle handle) { MInteropTask(handle); }
241 };
242 
243 // Class which stores specific lambda object.
244 template <class KernelType, class KernelArgType, int Dims>
245 class HostKernel : public HostKernelBase {
246  using IDBuilder = sycl::detail::Builder;
247  KernelType MKernel;
248  // Allowing accessing MKernel from 'ResetHostKernelHelper' method of
249  // 'sycl::handler'
250  friend class sycl::handler;
251 
252 public:
253  HostKernel(KernelType Kernel) : MKernel(Kernel) {}
254  void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
255  // adjust ND range for serial host:
256  NDRDescT AdjustedRange = NDRDesc;
257 
258  if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) {
259  // This is a special case - NDRange information is not complete, only the
260  // desired number of work groups is set by the user. Choose work group
261  // size (LocalSize), calculate the missing NDRange characteristics
262  // needed to invoke the kernel and adjust the NDRange descriptor
263  // accordingly. For some devices the work group size selection requires
264  // access to the device's properties, hence such late "adjustment".
265  range<3> WGsize{1, 1, 1}; // no better alternative for serial host?
266  AdjustedRange.set(NDRDesc.Dims,
267  nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize));
268  }
269  // If local size for host is not set explicitly, let's adjust it to 1,
270  // so nd_range_error for zero local size is not thrown.
271  if (AdjustedRange.LocalSize[0] == 0)
272  for (size_t I = 0; I < AdjustedRange.Dims; ++I)
273  AdjustedRange.LocalSize[I] = 1;
274  if (HPI)
275  HPI->start();
276  runOnHost(AdjustedRange);
277  if (HPI)
278  HPI->end();
279  }
280 
281  char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }
282 
283  template <class ArgT = KernelArgType>
285  runOnHost(const NDRDescT &) {
286  runKernelWithoutArg(MKernel);
287  }
288 
289  template <class ArgT = KernelArgType>
291  runOnHost(const NDRDescT &NDRDesc) {
292  sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
293  sycl::id<Dims> Offset;
294  sycl::range<Dims> Stride(
295  InitializedVal<Dims, range>::template get<1>()); // initialized to 1
296  sycl::range<Dims> UpperBound(
298  for (int I = 0; I < Dims; ++I) {
299  Range[I] = NDRDesc.GlobalSize[I];
300  Offset[I] = NDRDesc.GlobalOffset[I];
301  UpperBound[I] = Range[I] + Offset[I];
302  }
303 
305  /*LowerBound=*/Offset, Stride, UpperBound,
306  [&](const sycl::id<Dims> &ID) {
307  sycl::item<Dims, /*Offset=*/true> Item =
308  IDBuilder::createItem<Dims, true>(Range, ID, Offset);
309 
310  runKernelWithArg<const sycl::id<Dims> &>(MKernel, ID);
311  });
312  }
313 
314  template <class ArgT = KernelArgType>
315  typename detail::enable_if_t<
316  std::is_same<ArgT, item<Dims, /*Offset=*/false>>::value>
317  runOnHost(const NDRDescT &NDRDesc) {
318  sycl::id<Dims> ID;
319  sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
320  for (int I = 0; I < Dims; ++I)
321  Range[I] = NDRDesc.GlobalSize[I];
322 
323  detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> ID) {
324  sycl::item<Dims, /*Offset=*/false> Item =
325  IDBuilder::createItem<Dims, false>(Range, ID);
326  sycl::item<Dims, /*Offset=*/true> ItemWithOffset = Item;
327 
328  runKernelWithArg<sycl::item<Dims, /*Offset=*/false>>(MKernel, Item);
329  });
330  }
331 
332  template <class ArgT = KernelArgType>
333  typename detail::enable_if_t<
334  std::is_same<ArgT, item<Dims, /*Offset=*/true>>::value>
335  runOnHost(const NDRDescT &NDRDesc) {
336  sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
337  sycl::id<Dims> Offset;
338  sycl::range<Dims> Stride(
339  InitializedVal<Dims, range>::template get<1>()); // initialized to 1
340  sycl::range<Dims> UpperBound(
342  for (int I = 0; I < Dims; ++I) {
343  Range[I] = NDRDesc.GlobalSize[I];
344  Offset[I] = NDRDesc.GlobalOffset[I];
345  UpperBound[I] = Range[I] + Offset[I];
346  }
347 
349  /*LowerBound=*/Offset, Stride, UpperBound,
350  [&](const sycl::id<Dims> &ID) {
351  sycl::item<Dims, /*Offset=*/true> Item =
352  IDBuilder::createItem<Dims, true>(Range, ID, Offset);
353 
354  runKernelWithArg<sycl::item<Dims, /*Offset=*/true>>(MKernel, Item);
355  });
356  }
357 
358  template <class ArgT = KernelArgType>
360  runOnHost(const NDRDescT &NDRDesc) {
361  sycl::range<Dims> GroupSize(InitializedVal<Dims, range>::template get<0>());
362  for (int I = 0; I < Dims; ++I) {
363  if (NDRDesc.LocalSize[I] == 0 ||
364  NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
365  throw sycl::nd_range_error("Invalid local size for global size",
366  PI_ERROR_INVALID_WORK_GROUP_SIZE);
367  GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
368  }
369 
370  sycl::range<Dims> LocalSize(InitializedVal<Dims, range>::template get<0>());
371  sycl::range<Dims> GlobalSize(
373  sycl::id<Dims> GlobalOffset;
374  for (int I = 0; I < Dims; ++I) {
375  GlobalOffset[I] = NDRDesc.GlobalOffset[I];
376  LocalSize[I] = NDRDesc.LocalSize[I];
377  GlobalSize[I] = NDRDesc.GlobalSize[I];
378  }
379 
380  detail::NDLoop<Dims>::iterate(GroupSize, [&](const id<Dims> &GroupID) {
381  sycl::group<Dims> Group = IDBuilder::createGroup<Dims>(
382  GlobalSize, LocalSize, GroupSize, GroupID);
383 
384  detail::NDLoop<Dims>::iterate(LocalSize, [&](const id<Dims> &LocalID) {
385  id<Dims> GlobalID = GroupID * LocalSize + LocalID + GlobalOffset;
386  const sycl::item<Dims, /*Offset=*/true> GlobalItem =
387  IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
388  GlobalOffset);
389  const sycl::item<Dims, /*Offset=*/false> LocalItem =
390  IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
391  const sycl::nd_item<Dims> NDItem =
392  IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
393 
394  runKernelWithArg<const sycl::nd_item<Dims>>(MKernel, NDItem);
395  });
396  });
397  }
398 
399  template <typename ArgT = KernelArgType>
401  runOnHost(const NDRDescT &NDRDesc) {
402  sycl::range<Dims> NGroups(InitializedVal<Dims, range>::template get<0>());
403 
404  for (int I = 0; I < Dims; ++I) {
405  if (NDRDesc.LocalSize[I] == 0 ||
406  NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
407  throw sycl::nd_range_error("Invalid local size for global size",
408  PI_ERROR_INVALID_WORK_GROUP_SIZE);
409  NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
410  }
411 
412  sycl::range<Dims> LocalSize(InitializedVal<Dims, range>::template get<0>());
413  sycl::range<Dims> GlobalSize(
415  for (int I = 0; I < Dims; ++I) {
416  LocalSize[I] = NDRDesc.LocalSize[I];
417  GlobalSize[I] = NDRDesc.GlobalSize[I];
418  }
419  detail::NDLoop<Dims>::iterate(NGroups, [&](const id<Dims> &GroupID) {
420  sycl::group<Dims> Group =
421  IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, NGroups, GroupID);
422  runKernelWithArg<sycl::group<Dims>>(MKernel, Group);
423  });
424  }
425 
426  ~HostKernel() = default;
427 };
428 
429 } // namespace detail
430 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
431 } // namespace sycl
ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size, int Index)
Definition: cg_types.hpp:29
sycl::detail::kernel_param_kind_t MType
Definition: cg_types.hpp:33
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI)=0
detail::enable_if_t< std::is_same< ArgT, nd_item< Dims > >::value > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:360
detail::enable_if_t< std::is_same< ArgT, item< Dims, false > >::value > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:317
detail::enable_if_t< std::is_same< ArgT, item< Dims, true > >::value > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:335
enable_if_t< std::is_same< ArgT, sycl::group< Dims > >::value > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:401
HostKernel(KernelType Kernel)
Definition: cg_types.hpp:253
char * getPtr() override
Definition: cg_types.hpp:281
detail::enable_if_t< std::is_same< ArgT, void >::value > runOnHost(const NDRDescT &)
Definition: cg_types.hpp:285
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override
Definition: cg_types.hpp:254
detail::enable_if_t< std::is_same< ArgT, sycl::id< Dims > >::value > runOnHost(const NDRDescT &NDRDesc)
Definition: cg_types.hpp:291
Profiling info for the host execution.
void end()
Measures event's end time.
Definition: event_impl.cpp:353
void start()
Measures event's start time.
Definition: event_impl.cpp:351
void call(interop_handle handle)
Definition: cg_types.hpp:240
HostTask(std::function< void()> &&Func)
Definition: cg_types.hpp:234
HostTask(std::function< void(interop_handle)> &&Func)
Definition: cg_types.hpp:235
InteropTask(std::function< void(sycl::interop_handler)> Func)
Definition: cg_types.hpp:224
void call(sycl::interop_handler &h)
Definition: cg_types.hpp:225
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:116
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
Definition: cg_types.hpp:122
void setNumWorkGroups(sycl::range< Dims_ > N)
Definition: cg_types.hpp:104
void set(int Dims_, sycl::nd_range< 3 > ExecutionRange)
Definition: cg_types.hpp:93
void set(sycl::range< Dims_ > NumWorkItems, sycl::id< Dims_ > Offset)
Definition: cg_types.hpp:71
sycl::id< 3 > GlobalOffset
Definition: cg_types.hpp:118
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:117
void set(sycl::nd_range< Dims_ > ExecutionRange)
Definition: cg_types.hpp:82
void set(sycl::range< Dims_ > NumWorkItems)
Definition: cg_types.hpp:57
Identifies an instance of the function object executing at each point in a range.
Definition: item.hpp:39
Defines the iteration domain of both the work-groups and the overall dispatch.
Definition: nd_range.hpp:23
#define __SYCL_INLINE_VER_NAMESPACE(X)
constexpr bool isKernelLambdaCallableWithKernelHandlerImpl()
Definition: cg_types.hpp:160
std::enable_if_t< !KernelLambdaHasKernelHandlerArgT< KernelType, ArgType >::value > runKernelWithArg(KernelType KernelName, ArgType Arg)
Definition: cg_types.hpp:205
std::enable_if_t<!KernelLambdaHasKernelHandlerArgT< KernelType >::value > runKernelWithoutArg(KernelType KernelName)
Definition: cg_types.hpp:190
typename std::enable_if< B, T >::type enable_if_t
static constexpr bool check_kernel_lambda_takes_args()
Definition: cg_types.hpp:148
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
Generates an NDims-dimensional perfect loop nest.
Definition: common.hpp:325