DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
11 #include <detail/config.hpp>
13 #include <detail/handler_impl.hpp>
15 #include <detail/kernel_impl.hpp>
16 #include <detail/queue_impl.hpp>
19 #include <sycl/detail/common.hpp>
20 #include <sycl/detail/helpers.hpp>
22 #include <sycl/detail/pi.h>
23 #include <sycl/detail/pi.hpp>
24 #include <sycl/event.hpp>
25 #include <sycl/handler.hpp>
26 #include <sycl/info/info_desc.hpp>
27 #include <sycl/stream.hpp>
28 
29 namespace sycl {
31 
32 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
33  : handler(Queue, Queue, nullptr, IsHost) {}
34 
35 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
36  std::shared_ptr<detail::queue_impl> PrimaryQueue,
37  std::shared_ptr<detail::queue_impl> SecondaryQueue,
38  bool IsHost)
39  : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
40  std::move(SecondaryQueue))),
41  MQueue(std::move(Queue)), MIsHost(IsHost) {}
42 
43 // Sets the submission state to indicate that an explicit kernel bundle has been
44 // set. Throws a sycl::exception with errc::invalid if the current state
45 // indicates that a specialization constant has been set.
46 void handler::setStateExplicitKernelBundle() {
47  MImpl->setStateExplicitKernelBundle();
48 }
49 
50 // Sets the submission state to indicate that a specialization constant has been
51 // set. Throws a sycl::exception with errc::invalid if the current state
52 // indicates that an explicit kernel bundle has been set.
53 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
54 
55 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
56 // false otherwise.
57 bool handler::isStateExplicitKernelBundle() const {
58  return MImpl->isStateExplicitKernelBundle();
59 }
60 
61 // Returns a shared_ptr to the kernel_bundle.
62 // If there is no kernel_bundle created:
63 // returns newly created kernel_bundle if Insert is true
64 // returns shared_ptr(nullptr) if Insert is false
65 std::shared_ptr<detail::kernel_bundle_impl>
66 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
67  if (!MImpl->MKernelBundle && Insert) {
68  MImpl->MKernelBundle =
69  detail::getSyclObjImpl(get_kernel_bundle<bundle_state::input>(
70  MQueue->get_context(), {MQueue->get_device()}, {}));
71  }
72  return MImpl->MKernelBundle;
73 }
74 
75 // Sets kernel bundle to the provided one.
76 void handler::setHandlerKernelBundle(
77  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
78  MImpl->MKernelBundle = NewKernelBundleImpPtr;
79 }
80 
81 void handler::setHandlerKernelBundle(kernel Kernel) {
82  // Kernel may not have an associated kernel bundle if it is created from a
83  // program. As such, apply getSyclObjImpl directly on the kernel, i.e. not
84  // the other way around: getSyclObjImp(Kernel->get_kernel_bundle()).
85  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
86  detail::getSyclObjImpl(Kernel)->get_kernel_bundle();
87  setHandlerKernelBundle(KernelBundleImpl);
88 }
89 
90 event handler::finalize() {
91  // This block of code is needed only for reduction implementation.
92  // It is harmless (does nothing) for everything else.
93  if (MIsFinalized)
94  return MLastEvent;
95  MIsFinalized = true;
96 
97  const auto &type = getType();
98  if (type == detail::CG::Kernel) {
99  // If there were uses of set_specialization_constant build the kernel_bundle
100  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
101  getOrInsertHandlerKernelBundle(/*Insert=*/false);
102  if (KernelBundleImpPtr) {
103  // Make sure implicit non-interop kernel bundles have the kernel
104  if (!KernelBundleImpPtr->isInterop() &&
105  !MImpl->isStateExplicitKernelBundle()) {
106  kernel_id KernelID =
107  detail::ProgramManager::getInstance().getSYCLKernelID(MKernelName);
108  bool KernelInserted =
109  KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
110  // If kernel was not inserted and the bundle is in input mode we try
111  // building it and trying to find the kernel in executable mode
112  if (!KernelInserted &&
113  KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
114  auto KernelBundle =
115  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
116  KernelBundleImpPtr);
117  kernel_bundle<bundle_state::executable> ExecKernelBundle =
118  build(KernelBundle);
119  KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
120  setHandlerKernelBundle(KernelBundleImpPtr);
121  KernelInserted =
122  KernelBundleImpPtr->add_kernel(KernelID, MQueue->get_device());
123  }
124  // If the kernel was not found in executable mode we throw an exception
125  if (!KernelInserted)
126  throw sycl::exception(make_error_code(errc::runtime),
127  "Failed to add kernel to kernel bundle.");
128  }
129 
130  switch (KernelBundleImpPtr->get_bundle_state()) {
131  case bundle_state::input: {
132  // Underlying level expects kernel_bundle to be in executable state
133  kernel_bundle<bundle_state::executable> ExecBundle = build(
134  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
135  KernelBundleImpPtr));
136  KernelBundleImpPtr = detail::getSyclObjImpl(ExecBundle);
137  setHandlerKernelBundle(KernelBundleImpPtr);
138  break;
139  }
140  case bundle_state::executable:
141  // Nothing to do
142  break;
143  case bundle_state::object:
144  assert(0 && "Expected that the bundle is either in input or executable "
145  "states.");
146  break;
147  }
148  }
149 
150  if (!MQueue->is_in_fusion_mode() &&
151  MRequirements.size() + MEvents.size() + MStreamStorage.size() == 0) {
152  // if user does not add a new dependency to the dependency graph, i.e.
153  // the graph is not changed, and the queue is not in fusion mode, then
154  // this faster path is used to submit kernel bypassing scheduler and
155  // avoiding CommandGroup, Command objects creation.
156 
157  std::vector<RT::PiEvent> RawEvents;
158  detail::EventImplPtr NewEvent;
159  RT::PiEvent *OutEvent = nullptr;
160 
161  auto EnqueueKernel = [&]() {
162  // 'Result' for single point of return
163  pi_int32 Result = PI_ERROR_INVALID_VALUE;
164 
165  if (MQueue->is_host()) {
166  MHostKernel->call(MNDRDesc, (NewEvent)
167  ? NewEvent->getHostProfilingInfo()
168  : nullptr);
169  Result = PI_SUCCESS;
170  } else {
171  if (MQueue->getPlugin().getBackend() ==
172  backend::ext_intel_esimd_emulator) {
173  MQueue->getPlugin().call<detail::PiApiKind::piEnqueueKernelLaunch>(
174  nullptr, reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
175  MNDRDesc.Dims, &MNDRDesc.GlobalOffset[0],
176  &MNDRDesc.GlobalSize[0], &MNDRDesc.LocalSize[0], 0, nullptr,
177  nullptr);
178  Result = PI_SUCCESS;
179  } else {
180  Result = enqueueImpKernel(
181  MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel,
182  MKernelName, MOSModuleHandle, RawEvents, OutEvent, nullptr);
183  }
184  }
185  return Result;
186  };
187 
188  bool DiscardEvent = false;
189  if (MQueue->has_discard_events_support()) {
190  // Kernel only uses assert if it's non interop one
191  bool KernelUsesAssert =
192  !(MKernel && MKernel->isInterop()) &&
193  detail::ProgramManager::getInstance().kernelUsesAssert(
194  MOSModuleHandle, MKernelName);
195  DiscardEvent = !KernelUsesAssert;
196  }
197 
198  if (DiscardEvent) {
199  if (PI_SUCCESS != EnqueueKernel())
200  throw runtime_error("Enqueue process failed.",
201  PI_ERROR_INVALID_OPERATION);
202  } else {
203  NewEvent = std::make_shared<detail::event_impl>(MQueue);
204  NewEvent->setContextImpl(MQueue->getContextImplPtr());
205  NewEvent->setStateIncomplete();
206  OutEvent = &NewEvent->getHandleRef();
207 
208  NewEvent->setSubmissionTime();
209 
210  if (PI_SUCCESS != EnqueueKernel())
211  throw runtime_error("Enqueue process failed.",
212  PI_ERROR_INVALID_OPERATION);
213  else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
214  NewEvent->setComplete();
215 
216  MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
217  }
218  return MLastEvent;
219  }
220  }
221 
222  std::unique_ptr<detail::CG> CommandGroup;
223  switch (type) {
224  case detail::CG::Kernel:
225  case detail::CG::RunOnHostIntel: {
226  // Copy kernel name here instead of move so that it's available after
227  // running of this method by reductions implementation. This allows for
228  // assert feature to check if kernel uses assertions
229  CommandGroup.reset(new detail::CGExecKernel(
230  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
231  std::move(MImpl->MKernelBundle), std::move(MArgsStorage),
232  std::move(MAccStorage), std::move(MSharedPtrStorage),
233  std::move(MRequirements), std::move(MEvents), std::move(MArgs),
234  MKernelName, MOSModuleHandle, std::move(MStreamStorage),
235  std::move(MImpl->MAuxiliaryResources), MCGType, MCodeLoc));
236  break;
237  }
238  case detail::CG::CodeplayInteropTask:
239  CommandGroup.reset(new detail::CGInteropTask(
240  std::move(MInteropTask), std::move(MArgsStorage),
241  std::move(MAccStorage), std::move(MSharedPtrStorage),
242  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
243  break;
244  case detail::CG::CopyAccToPtr:
245  case detail::CG::CopyPtrToAcc:
246  case detail::CG::CopyAccToAcc:
247  CommandGroup.reset(new detail::CGCopy(
248  MCGType, MSrcPtr, MDstPtr, std::move(MArgsStorage),
249  std::move(MAccStorage), std::move(MSharedPtrStorage),
250  std::move(MRequirements), std::move(MEvents), MCodeLoc));
251  break;
252  case detail::CG::Fill:
253  CommandGroup.reset(new detail::CGFill(
254  std::move(MPattern), MDstPtr, std::move(MArgsStorage),
255  std::move(MAccStorage), std::move(MSharedPtrStorage),
256  std::move(MRequirements), std::move(MEvents), MCodeLoc));
257  break;
258  case detail::CG::UpdateHost:
259  CommandGroup.reset(new detail::CGUpdateHost(
260  MDstPtr, std::move(MArgsStorage), std::move(MAccStorage),
261  std::move(MSharedPtrStorage), std::move(MRequirements),
262  std::move(MEvents), MCodeLoc));
263  break;
264  case detail::CG::CopyUSM:
265  CommandGroup.reset(new detail::CGCopyUSM(
266  MSrcPtr, MDstPtr, MLength, std::move(MArgsStorage),
267  std::move(MAccStorage), std::move(MSharedPtrStorage),
268  std::move(MRequirements), std::move(MEvents), MCodeLoc));
269  break;
270  case detail::CG::FillUSM:
271  CommandGroup.reset(new detail::CGFillUSM(
272  std::move(MPattern), MDstPtr, MLength, std::move(MArgsStorage),
273  std::move(MAccStorage), std::move(MSharedPtrStorage),
274  std::move(MRequirements), std::move(MEvents), MCodeLoc));
275  break;
276  case detail::CG::PrefetchUSM:
277  CommandGroup.reset(new detail::CGPrefetchUSM(
278  MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage),
279  std::move(MSharedPtrStorage), std::move(MRequirements),
280  std::move(MEvents), MCodeLoc));
281  break;
282  case detail::CG::AdviseUSM:
283  CommandGroup.reset(new detail::CGAdviseUSM(
284  MDstPtr, MLength, MImpl->MAdvice, std::move(MArgsStorage),
285  std::move(MAccStorage), std::move(MSharedPtrStorage),
286  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
287  break;
288  case detail::CG::Copy2DUSM:
289  CommandGroup.reset(new detail::CGCopy2DUSM(
290  MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
291  MImpl->MHeight, std::move(MArgsStorage), std::move(MAccStorage),
292  std::move(MSharedPtrStorage), std::move(MRequirements),
293  std::move(MEvents), MCodeLoc));
294  break;
295  case detail::CG::Fill2DUSM:
296  CommandGroup.reset(new detail::CGFill2DUSM(
297  std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
298  MImpl->MHeight, std::move(MArgsStorage), std::move(MAccStorage),
299  std::move(MSharedPtrStorage), std::move(MRequirements),
300  std::move(MEvents), MCodeLoc));
301  break;
302  case detail::CG::Memset2DUSM:
303  CommandGroup.reset(new detail::CGMemset2DUSM(
304  MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
305  std::move(MArgsStorage), std::move(MAccStorage),
306  std::move(MSharedPtrStorage), std::move(MRequirements),
307  std::move(MEvents), MCodeLoc));
308  break;
309  case detail::CG::CodeplayHostTask:
310  CommandGroup.reset(new detail::CGHostTask(
311  std::move(MHostTask), MQueue, MQueue->getContextImplPtr(),
312  std::move(MArgs), std::move(MArgsStorage), std::move(MAccStorage),
313  std::move(MSharedPtrStorage), std::move(MRequirements),
314  std::move(MEvents), MCGType, MCodeLoc));
315  break;
316  case detail::CG::Barrier:
317  case detail::CG::BarrierWaitlist:
318  CommandGroup.reset(new detail::CGBarrier(
319  std::move(MEventsWaitWithBarrier), std::move(MArgsStorage),
320  std::move(MAccStorage), std::move(MSharedPtrStorage),
321  std::move(MRequirements), std::move(MEvents), MCGType, MCodeLoc));
322  break;
323  case detail::CG::None:
325  std::cout << "WARNING: An empty command group is submitted." << std::endl;
326  }
327  detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
328  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
329  return MLastEvent;
330  }
331 
332  if (!CommandGroup)
333  throw sycl::runtime_error(
334  "Internal Error. Command group cannot be constructed.",
335  PI_ERROR_INVALID_OPERATION);
336 
337  detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG(
338  std::move(CommandGroup), std::move(MQueue));
339 
340  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
341  return MLastEvent;
342 }
343 
344 void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
345  MImpl->MAuxiliaryResources.push_back(ReduObj);
346 }
347 
348 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
349  access::target AccTarget) {
351  detail::Requirement *Req = AccImpl.get();
352  // Add accessor to the list of requirements.
353  MRequirements.push_back(Req);
354  // Store copy of the accessor.
355  MAccStorage.push_back(std::move(AccImpl));
356  // Add an accessor to the handler list of associated accessors.
357  // For associated accessors index does not means nothing.
358  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
359  Req, static_cast<int>(AccTarget),
360  /*index*/ 0);
361 }
362 
363 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
364  size_t &IndexShift, int Size,
365  bool IsKernelCreatedFromSource,
366  size_t GlobalSize,
367  std::vector<detail::ArgDesc> &Args,
368  bool isESIMD) {
370  if (AccImpl->PerWI)
371  AccImpl->resize(GlobalSize);
372 
373  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
374  Index + IndexShift);
375 
376  // TODO ESIMD currently does not suport offset, memory and access ranges -
377  // accessor::init for ESIMD-mode accessor has a single field, translated
378  // to a single kernel argument set above.
379  if (!isESIMD && !IsKernelCreatedFromSource) {
380  // Dimensionality of the buffer is 1 when dimensionality of the
381  // accessor is 0.
382  const size_t SizeAccField =
383  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
384  ++IndexShift;
385  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
386  &AccImpl->MAccessRange[0], SizeAccField,
387  Index + IndexShift);
388  ++IndexShift;
389  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
390  &AccImpl->MMemoryRange[0], SizeAccField,
391  Index + IndexShift);
392  ++IndexShift;
393  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
394  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
395  }
396 }
397 
398 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
399  const int Size, const size_t Index, size_t &IndexShift,
400  bool IsKernelCreatedFromSource, bool IsESIMD) {
402 
403  switch (Kind) {
404  case kernel_param_kind_t::kind_std_layout:
405  case kernel_param_kind_t::kind_pointer: {
406  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
407  break;
408  }
409  case kernel_param_kind_t::kind_stream: {
410  // Stream contains several accessors inside.
411  stream *S = static_cast<stream *>(Ptr);
412 
413  detail::AccessorBaseHost *GBufBase =
414  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
415  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
416  detail::Requirement *GBufReq = GBufImpl.get();
417  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
418  IsKernelCreatedFromSource,
419  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
420  ++IndexShift;
421  detail::AccessorBaseHost *GOffsetBase =
422  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
423  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
424  detail::Requirement *GOffsetReq = GOfssetImpl.get();
425  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
426  IsKernelCreatedFromSource,
427  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
428  ++IndexShift;
429  detail::AccessorBaseHost *GFlushBase =
430  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
431  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
432  detail::Requirement *GFlushReq = GFlushImpl.get();
433 
434  size_t GlobalSize = MNDRDesc.GlobalSize.size();
435  // If work group size wasn't set explicitly then it must be recieved
436  // from kernel attribute or set to default values.
437  // For now we can't get this attribute here.
438  // So we just suppose that WG size is always default for stream.
439  // TODO adjust MNDRDesc when device image contains kernel's attribute
440  if (GlobalSize == 0) {
441  // Suppose that work group size is 1 for every dimension
442  GlobalSize = MNDRDesc.NumWorkGroups.size();
443  }
444  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
445  IsKernelCreatedFromSource, GlobalSize, MArgs,
446  IsESIMD);
447  ++IndexShift;
448  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
449  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
450  Index + IndexShift);
451 
452  break;
453  }
454  case kernel_param_kind_t::kind_accessor: {
455  // For args kind of accessor Size is information about accessor.
456  // The first 11 bits of Size encodes the accessor target.
457  const access::target AccTarget = static_cast<access::target>(Size & 0x7ff);
458  switch (AccTarget) {
459  case access::target::device:
460  case access::target::constant_buffer: {
461  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
462  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
463  IsKernelCreatedFromSource,
464  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
465  break;
466  }
467  case access::target::local: {
468  detail::LocalAccessorImplHost *LAcc =
469  static_cast<detail::LocalAccessorImplHost *>(Ptr);
470 
471  range<3> &Size = LAcc->MSize;
472  const int Dims = LAcc->MDims;
473  int SizeInBytes = LAcc->MElemSize;
474  for (int I = 0; I < Dims; ++I)
475  SizeInBytes *= Size[I];
476  // Some backends do not accept zero-sized local memory arguments, so we
477  // make it a minimum allocation of 1 byte.
478  SizeInBytes = std::max(SizeInBytes, 1);
479  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
480  SizeInBytes, Index + IndexShift);
481  if (!IsKernelCreatedFromSource) {
482  ++IndexShift;
483  const size_t SizeAccField = Dims * sizeof(Size[0]);
484  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
485  SizeAccField, Index + IndexShift);
486  ++IndexShift;
487  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
488  SizeAccField, Index + IndexShift);
489  ++IndexShift;
490  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
491  SizeAccField, Index + IndexShift);
492  }
493  break;
494  }
495  case access::target::image:
496  case access::target::image_array: {
497  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
498  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
499  if (!IsKernelCreatedFromSource) {
500  // TODO Handle additional kernel arguments for image class
501  // if the compiler front-end adds them.
502  }
503  break;
504  }
505  case access::target::host_image:
506  case access::target::host_buffer: {
507  throw sycl::invalid_parameter_error("Unsupported accessor target case.",
508  PI_ERROR_INVALID_OPERATION);
509  break;
510  }
511  }
512  break;
513  }
514  case kernel_param_kind_t::kind_sampler: {
515  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
516  Index + IndexShift);
517  break;
518  }
519  case kernel_param_kind_t::kind_specialization_constants_buffer: {
520  MArgs.emplace_back(
521  kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
522  Index + IndexShift);
523  break;
524  }
525  case kernel_param_kind_t::kind_invalid:
526  throw runtime_error("Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
527  break;
528  }
529 }
530 
531 // The argument can take up more space to store additional information about
532 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
533 // We use the worst-case estimate because the lifetime of the vector is short.
534 // In processArg the kind_stream case introduces the maximum number of
535 // additional arguments. The case adds additional 12 arguments to the currently
536 // processed argument, hence worst-case estimate is 12+1=13.
537 // TODO: the constant can be removed if the size of MArgs will be calculated at
538 // compile time.
539 inline constexpr size_t MaxNumAdditionalArgs = 13;
540 
541 void handler::extractArgsAndReqs() {
542  assert(MKernel && "MKernel is not initialized");
543  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
544  MArgs.clear();
545 
546  std::sort(
547  UnPreparedArgs.begin(), UnPreparedArgs.end(),
548  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
549  return (first.MIndex < second.MIndex);
550  });
551 
552  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
553  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
554 
555  size_t IndexShift = 0;
556  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
557  void *Ptr = UnPreparedArgs[I].MPtr;
558  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
559  const int &Size = UnPreparedArgs[I].MSize;
560  const int Index = UnPreparedArgs[I].MIndex;
561  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
562  false);
563  }
564 }
565 
566 void handler::extractArgsAndReqsFromLambda(
567  char *LambdaPtr, size_t KernelArgsNum,
568  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
569  const bool IsKernelCreatedFromSource = false;
570  size_t IndexShift = 0;
571  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
572 
573  for (size_t I = 0; I < KernelArgsNum; ++I) {
574  void *Ptr = LambdaPtr + KernelArgs[I].offset;
575  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
576  const int &Size = KernelArgs[I].info;
577  if (Kind == detail::kernel_param_kind_t::kind_accessor) {
578  // For args kind of accessor Size is information about accessor.
579  // The first 11 bits of Size encodes the accessor target.
580  const access::target AccTarget =
581  static_cast<access::target>(Size & 0x7ff);
582  if ((AccTarget == access::target::device ||
583  AccTarget == access::target::constant_buffer) ||
584  (AccTarget == access::target::image ||
585  AccTarget == access::target::image_array)) {
586  detail::AccessorBaseHost *AccBase =
587  static_cast<detail::AccessorBaseHost *>(Ptr);
588  Ptr = detail::getSyclObjImpl(*AccBase).get();
589  } else if (AccTarget == access::target::local) {
590  detail::LocalAccessorBaseHost *LocalAccBase =
591  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
592  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
593  }
594  }
595  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
596  IsESIMD);
597  }
598 }
599 
600 // Calling methods of kernel_impl requires knowledge of class layout.
601 // As this is impossible in header, there's a function that calls necessary
602 // method inside the library and returns the result.
603 std::string handler::getKernelName() {
604  return MKernel->get_info<info::kernel::function_name>();
605 }
606 
607 void handler::verifyUsedKernelBundle(const std::string &KernelName) {
608  auto UsedKernelBundleImplPtr =
609  getOrInsertHandlerKernelBundle(/*Insert=*/false);
610  if (!UsedKernelBundleImplPtr)
611  return;
612 
613  // Implicit kernel bundles are populated late so we ignore them
614  if (!MImpl->isStateExplicitKernelBundle())
615  return;
616 
617  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
619  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
620  throw sycl::exception(
621  make_error_code(errc::kernel_not_supported),
622  "The kernel bundle in use does not contain the kernel");
623 }
624 
625 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
626  throwIfActionIsCreated();
627  MCGType = detail::CG::BarrierWaitlist;
628  MEventsWaitWithBarrier.resize(WaitList.size());
629  std::transform(
630  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
631  [](const event &Event) { return detail::getSyclObjImpl(Event); });
632 }
633 
634 __SYCL2020_DEPRECATED("use 'ext_oneapi_barrier' instead")
635 void handler::barrier(const std::vector<event> &WaitList) {
636  handler::ext_oneapi_barrier(WaitList);
637 }
638 
639 using namespace sycl::detail;
640 bool handler::DisableRangeRounding() {
642 }
643 
644 bool handler::RangeRoundingTrace() {
646 }
647 
648 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
649  size_t &MinRange) {
650  SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
651  MinFactor, GoodFactor, MinRange);
652 }
653 
654 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
655  throwIfActionIsCreated();
656  MSrcPtr = const_cast<void *>(Src);
657  MDstPtr = Dest;
658  MLength = Count;
659  setType(detail::CG::CopyUSM);
660 }
661 
662 void handler::memset(void *Dest, int Value, size_t Count) {
663  throwIfActionIsCreated();
664  MDstPtr = Dest;
665  MPattern.push_back(static_cast<char>(Value));
666  MLength = Count;
667  setType(detail::CG::FillUSM);
668 }
669 
670 void handler::prefetch(const void *Ptr, size_t Count) {
671  throwIfActionIsCreated();
672  MDstPtr = const_cast<void *>(Ptr);
673  MLength = Count;
674  setType(detail::CG::PrefetchUSM);
675 }
676 
677 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
678  throwIfActionIsCreated();
679  MDstPtr = const_cast<void *>(Ptr);
680  MLength = Count;
681  MImpl->MAdvice = static_cast<pi_mem_advice>(Advice);
682  setType(detail::CG::AdviseUSM);
683 }
684 
685 void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch,
686  const void *Src, size_t SrcPitch,
687  size_t Width, size_t Height) {
688  // Checks done in callers.
689  MSrcPtr = const_cast<void *>(Src);
690  MDstPtr = Dest;
691  MImpl->MSrcPitch = SrcPitch;
692  MImpl->MDstPitch = DestPitch;
693  MImpl->MWidth = Width;
694  MImpl->MHeight = Height;
695  setType(detail::CG::Copy2DUSM);
696 }
697 
698 void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch,
699  const void *Value, size_t ValueSize,
700  size_t Width, size_t Height) {
701  // Checks done in callers.
702  MDstPtr = Dest;
703  MPattern.resize(ValueSize);
704  std::memcpy(MPattern.data(), Value, ValueSize);
705  MImpl->MDstPitch = DestPitch;
706  MImpl->MWidth = Width;
707  MImpl->MHeight = Height;
708  setType(detail::CG::Fill2DUSM);
709 }
710 
711 void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
712  size_t Width, size_t Height) {
713  // Checks done in callers.
714  MDstPtr = Dest;
715  MPattern.push_back(static_cast<char>(Value));
716  MImpl->MDstPitch = DestPitch;
717  MImpl->MWidth = Width;
718  MImpl->MHeight = Height;
719  setType(detail::CG::Memset2DUSM);
720 }
721 
722 void handler::use_kernel_bundle(
723  const kernel_bundle<bundle_state::executable> &ExecBundle) {
724 
725  std::shared_ptr<detail::queue_impl> PrimaryQueue =
726  MImpl->MSubmissionPrimaryQueue;
727  if (PrimaryQueue->get_context() != ExecBundle.get_context())
728  throw sycl::exception(
729  make_error_code(errc::invalid),
730  "Context associated with the primary queue is different from the "
731  "context associated with the kernel bundle");
732 
733  std::shared_ptr<detail::queue_impl> SecondaryQueue =
734  MImpl->MSubmissionSecondaryQueue;
735  if (SecondaryQueue &&
736  SecondaryQueue->get_context() != ExecBundle.get_context())
737  throw sycl::exception(
738  make_error_code(errc::invalid),
739  "Context associated with the secondary queue is different from the "
740  "context associated with the kernel bundle");
741 
742  setStateExplicitKernelBundle();
743  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
744 }
745 
746 void handler::depends_on(event Event) {
747  auto EventImpl = detail::getSyclObjImpl(Event);
748  if (EventImpl->isDiscarded()) {
749  throw sycl::exception(make_error_code(errc::invalid),
750  "Queue operation cannot depend on discarded event.");
751  }
752  MEvents.push_back(EventImpl);
753 }
754 
755 void handler::depends_on(const std::vector<event> &Events) {
756  for (const event &Event : Events) {
757  auto EventImpl = detail::getSyclObjImpl(Event);
758  if (EventImpl->isDiscarded()) {
759  throw sycl::exception(
760  make_error_code(errc::invalid),
761  "Queue operation cannot depend on discarded event.");
762  }
763  MEvents.push_back(EventImpl);
764  }
765 }
766 
767 static bool
768 checkContextSupports(const std::shared_ptr<detail::context_impl> &ContextImpl,
769  detail::RT::PiContextInfo InfoQuery) {
770  auto &Plugin = ContextImpl->getPlugin();
771  pi_bool SupportsOp = false;
772  Plugin.call<detail::PiApiKind::piContextGetInfo>(ContextImpl->getHandleRef(),
773  InfoQuery, sizeof(pi_bool),
774  &SupportsOp, nullptr);
775  return SupportsOp;
776 }
777 
778 bool handler::supportsUSMMemcpy2D() {
779  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
780  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
781  if (QueueImpl &&
782  !checkContextSupports(QueueImpl->getContextImplPtr(),
784  return false;
785  }
786  return true;
787 }
788 
789 bool handler::supportsUSMFill2D() {
790  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
791  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
792  if (QueueImpl &&
793  !checkContextSupports(QueueImpl->getContextImplPtr(),
795  return false;
796  }
797  return true;
798 }
799 
800 bool handler::supportsUSMMemset2D() {
801  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
802  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
803  if (QueueImpl &&
804  !checkContextSupports(QueueImpl->getContextImplPtr(),
806  return false;
807  }
808  return true;
809 }
810 
811 id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) {
812  device Dev = MQueue->get_device();
813  id<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
814  Dev.get_info<info::device::max_compute_units>();
815  return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
816 }
817 
818 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
819 } // namespace sycl
void resize(size_t GlobalSize)
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:40
Command group handler class.
Definition: handler.hpp:312
context get_context() const noexcept
#define __SYCL_INLINE_VER_NAMESPACE(X)
#define __SYCL2020_DEPRECATED(message)
__ESIMD_API void barrier()
Generic work-group barrier.
Definition: memory.hpp:1109
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:199
::pi_event PiEvent
Definition: pi.hpp:134
bool trace(TraceLevel level)
Definition: pi.cpp:400
::pi_context_info PiContextInfo
Definition: pi.hpp:127
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:15
kernel_id get_kernel_id_impl(std::string KernelName)
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: common.hpp:300
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: common.hpp:318
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:42
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, const detail::OSModuleHandle &OSModuleHandle, std::vector< RT::PiEvent > &RawEvents, RT::PiEvent *OutEvent, const std::function< void *(Requirement *Req)> &getMemAllocationFunc)
Definition: commands.cpp:2097
AccessorImplHost Requirement
void memcpy(void *Dst, const void *Src, std::size_t Size)
void associateWithHandler(handler &, AccessorBaseHost *, access::target)
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor.hpp:455
prefetch_impl< _B > prefetch
Definition: fpga_lsu.hpp:45
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:539
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, detail::RT::PiContextInfo InfoQuery)
Definition: handler.cpp:768
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:91
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:363
---— Error handling, matching OpenCL plugin semantics.
Definition: access.hpp:14
int32_t pi_int32
Definition: pi.h:123
pi_uint32 pi_bool
Definition: pi.h:126
_pi_mem_advice
Definition: pi.h:455
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:350
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:351
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:349
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
C++ wrapper of extern "C" PI interfaces.
Implementation of a PI Kernel for CUDA.
Definition: pi_cuda.hpp:821