DPC++ Runtime
Runtime libraries for oneAPI DPC++
handler.cpp
Go to the documentation of this file.
1 //==-------- handler.cpp --- SYCL command group handler --------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <algorithm>
10 
11 #include <detail/config.hpp>
13 #include <detail/graph_impl.hpp>
14 #include <detail/handler_impl.hpp>
15 #include <detail/image_impl.hpp>
17 #include <detail/kernel_impl.hpp>
18 #include <detail/queue_impl.hpp>
21 #include <detail/usm/usm_impl.hpp>
22 #include <sycl/detail/common.hpp>
23 #include <sycl/detail/helpers.hpp>
25 #include <sycl/detail/pi.h>
26 #include <sycl/detail/pi.hpp>
27 #include <sycl/event.hpp>
28 #include <sycl/handler.hpp>
29 #include <sycl/info/info_desc.hpp>
30 #include <sycl/stream.hpp>
31 
33 
34 namespace sycl {
35 inline namespace _V1 {
36 
37 namespace detail {
38 
39 bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) {
40  DeviceGlobalMapEntry *DGEntry =
42  DeviceGlobalPtr);
43  return DGEntry && !DGEntry->MImageIdentifiers.empty();
44 }
45 
48  if (DstPtrType == sycl::usm::alloc::device) {
49  // Dest is on device
50  if (SrcPtrType == sycl::usm::alloc::device)
52  if (SrcPtrType == sycl::usm::alloc::host ||
53  SrcPtrType == sycl::usm::alloc::unknown)
56  "Unknown copy source location");
57  }
58  if (DstPtrType == sycl::usm::alloc::host ||
59  DstPtrType == sycl::usm::alloc::unknown) {
60  // Dest is on host
61  if (SrcPtrType == sycl::usm::alloc::device)
63  if (SrcPtrType == sycl::usm::alloc::host ||
64  SrcPtrType == sycl::usm::alloc::unknown)
66  "Cannot copy image from host to host");
68  "Unknown copy source location");
69  }
71  "Unknown copy destination location");
72 }
73 
76  &DynamicParamBase) {
77  return sycl::detail::getSyclObjImpl(DynamicParamBase)->getValue();
78 }
79 
80 } // namespace detail
81 
82 handler::handler(std::shared_ptr<detail::queue_impl> Queue, bool IsHost)
83  : handler(Queue, Queue, nullptr, IsHost) {}
84 
85 handler::handler(std::shared_ptr<detail::queue_impl> Queue,
86  std::shared_ptr<detail::queue_impl> PrimaryQueue,
87  std::shared_ptr<detail::queue_impl> SecondaryQueue,
88  bool IsHost)
89  : MImpl(std::make_shared<detail::handler_impl>(std::move(PrimaryQueue),
90  std::move(SecondaryQueue))),
91  MQueue(std::move(Queue)), MIsHost(IsHost) {}
92 
93 handler::handler(
94  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph)
95  : MImpl(std::make_shared<detail::handler_impl>()), MGraph(Graph) {}
96 
97 // Sets the submission state to indicate that an explicit kernel bundle has been
98 // set. Throws a sycl::exception with errc::invalid if the current state
99 // indicates that a specialization constant has been set.
100 void handler::setStateExplicitKernelBundle() {
101  MImpl->setStateExplicitKernelBundle();
102 }
103 
104 // Sets the submission state to indicate that a specialization constant has been
105 // set. Throws a sycl::exception with errc::invalid if the current state
106 // indicates that an explicit kernel bundle has been set.
107 void handler::setStateSpecConstSet() { MImpl->setStateSpecConstSet(); }
108 
109 // Returns true if the submission state is EXPLICIT_KERNEL_BUNDLE_STATE and
110 // false otherwise.
111 bool handler::isStateExplicitKernelBundle() const {
112  return MImpl->isStateExplicitKernelBundle();
113 }
114 
115 // Returns a shared_ptr to the kernel_bundle.
116 // If there is no kernel_bundle created:
117 // returns newly created kernel_bundle if Insert is true
118 // returns shared_ptr(nullptr) if Insert is false
119 std::shared_ptr<detail::kernel_bundle_impl>
120 handler::getOrInsertHandlerKernelBundle(bool Insert) const {
121  if (!MImpl->MKernelBundle && Insert) {
122  auto Ctx = MGraph ? MGraph->getContext() : MQueue->get_context();
123  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
124  MImpl->MKernelBundle = detail::getSyclObjImpl(
125  get_kernel_bundle<bundle_state::input>(Ctx, {Dev}, {}));
126  }
127  return MImpl->MKernelBundle;
128 }
129 
130 // Sets kernel bundle to the provided one.
131 void handler::setHandlerKernelBundle(
132  const std::shared_ptr<detail::kernel_bundle_impl> &NewKernelBundleImpPtr) {
133  MImpl->MKernelBundle = NewKernelBundleImpPtr;
134 }
135 
136 void handler::setHandlerKernelBundle(kernel Kernel) {
137  // Kernel may not have an associated kernel bundle if it is created from a
138  // program. As such, apply getSyclObjImpl directly on the kernel, i.e. not
139  // the other way around: getSyclObjImp(Kernel->get_kernel_bundle()).
140  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpl =
141  detail::getSyclObjImpl(Kernel)->get_kernel_bundle();
142  setHandlerKernelBundle(KernelBundleImpl);
143 }
144 
145 event handler::finalize() {
146  // This block of code is needed only for reduction implementation.
147  // It is harmless (does nothing) for everything else.
148  if (MIsFinalized)
149  return MLastEvent;
150  MIsFinalized = true;
151 
152  // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed
153  // to a command without being bound to a command group, an exception should
154  // be thrown.
155  {
156  for (const auto &arg : MArgs) {
158  continue;
159 
160  detail::Requirement *AccImpl =
161  static_cast<detail::Requirement *>(arg.MPtr);
162  if (AccImpl->MIsPlaceH) {
163  auto It = std::find(CGData.MRequirements.begin(),
164  CGData.MRequirements.end(), AccImpl);
165  if (It == CGData.MRequirements.end())
167  "placeholder accessor must be bound by calling "
168  "handler::require() before it can be used.");
169 
170  // Check associated accessors
171  bool AccFound = false;
172  for (detail::ArgDesc &Acc : MAssociatedAccesors) {
174  static_cast<detail::Requirement *>(Acc.MPtr) == AccImpl) {
175  AccFound = true;
176  break;
177  }
178  }
179 
180  if (!AccFound) {
182  "placeholder accessor must be bound by calling "
183  "handler::require() before it can be used.");
184  }
185  }
186  }
187  }
188 
189  const auto &type = getType();
190  if (type == detail::CG::Kernel) {
191  // If there were uses of set_specialization_constant build the kernel_bundle
192  std::shared_ptr<detail::kernel_bundle_impl> KernelBundleImpPtr =
193  getOrInsertHandlerKernelBundle(/*Insert=*/false);
194  if (KernelBundleImpPtr) {
195  // Make sure implicit non-interop kernel bundles have the kernel
196  if (!KernelBundleImpPtr->isInterop() &&
197  !MImpl->isStateExplicitKernelBundle()) {
198  auto Dev = MGraph ? MGraph->getDevice() : MQueue->get_device();
199  kernel_id KernelID =
201  MKernelName.c_str());
202  bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
203  // If kernel was not inserted and the bundle is in input mode we try
204  // building it and trying to find the kernel in executable mode
205  if (!KernelInserted &&
206  KernelBundleImpPtr->get_bundle_state() == bundle_state::input) {
207  auto KernelBundle =
208  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
209  KernelBundleImpPtr);
210  kernel_bundle<bundle_state::executable> ExecKernelBundle =
211  build(KernelBundle);
212  KernelBundleImpPtr = detail::getSyclObjImpl(ExecKernelBundle);
213  setHandlerKernelBundle(KernelBundleImpPtr);
214  KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
215  }
216  // If the kernel was not found in executable mode we throw an exception
217  if (!KernelInserted)
219  "Failed to add kernel to kernel bundle.");
220  }
221 
222  switch (KernelBundleImpPtr->get_bundle_state()) {
223  case bundle_state::input: {
224  // Underlying level expects kernel_bundle to be in executable state
225  kernel_bundle<bundle_state::executable> ExecBundle = build(
226  detail::createSyclObjFromImpl<kernel_bundle<bundle_state::input>>(
227  KernelBundleImpPtr));
228  KernelBundleImpPtr = detail::getSyclObjImpl(ExecBundle);
229  setHandlerKernelBundle(KernelBundleImpPtr);
230  break;
231  }
233  // Nothing to do
234  break;
237  assert(0 && "Expected that the bundle is either in input or executable "
238  "states.");
239  break;
240  }
241  }
242 
243  if (MQueue && !MGraph && !MSubgraphNode && !MQueue->getCommandGraph() &&
244  !MQueue->is_in_fusion_mode() && !CGData.MRequirements.size() &&
245  !MStreamStorage.size() &&
246  (!CGData.MEvents.size() ||
247  (MQueue->isInOrder() &&
249  CGData.MEvents, MQueue->getContextImplPtr())))) {
250  // if user does not add a new dependency to the dependency graph, i.e.
251  // the graph is not changed, and the queue is not in fusion mode, then
252  // this faster path is used to submit kernel bypassing scheduler and
253  // avoiding CommandGroup, Command objects creation.
254 
255  std::vector<sycl::detail::pi::PiEvent> RawEvents;
256  detail::EventImplPtr NewEvent;
257 
258 #ifdef XPTI_ENABLE_INSTRUMENTATION
259  // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent,
260  int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
261  auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
262  StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, MNDRDesc,
263  KernelBundleImpPtr, MArgs);
264  auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
265  InstanceID = InstanceID]() {
266 #else
267  auto EnqueueKernel = [&]() {
268 #endif
269  // 'Result' for single point of return
270  pi_int32 Result = PI_ERROR_INVALID_VALUE;
271 #ifdef XPTI_ENABLE_INSTRUMENTATION
272  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
273  xpti::trace_task_begin, nullptr);
274 #endif
275  if (MQueue->is_host()) {
276  MHostKernel->call(MNDRDesc, (NewEvent)
277  ? NewEvent->getHostProfilingInfo()
278  : nullptr);
279  Result = PI_SUCCESS;
280  } else {
281  if (MQueue->getDeviceImplPtr()->getBackend() ==
282  backend::ext_intel_esimd_emulator) {
283  // Capture the host timestamp for profiling (queue time)
284  if (NewEvent != nullptr)
285  NewEvent->setHostEnqueueTime();
286  [&](auto... Args) {
287  if (MImpl->MKernelIsCooperative) {
288  MQueue->getPlugin()
289  ->call<
291  Args...);
292  } else {
293  MQueue->getPlugin()
295  }
296  }(/* queue */
297  nullptr,
298  /* kernel */
299  reinterpret_cast<pi_kernel>(MHostKernel->getPtr()),
300  /* work_dim */
301  MNDRDesc.Dims,
302  /* global_work_offset */ &MNDRDesc.GlobalOffset[0],
303  /* global_work_size */ &MNDRDesc.GlobalSize[0],
304  /* local_work_size */ &MNDRDesc.LocalSize[0],
305  /* num_events_in_wait_list */ 0,
306  /* event_wait_list */ nullptr,
307  /* event */ nullptr);
308  Result = PI_SUCCESS;
309  } else {
310  Result = enqueueImpKernel(
311  MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel,
312  MKernelName.c_str(), RawEvents, NewEvent, nullptr,
313  MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative);
314  }
315  }
316 #ifdef XPTI_ENABLE_INSTRUMENTATION
317  // Emit signal only when event is created
318  if (NewEvent != nullptr) {
319  detail::emitInstrumentationGeneral(
320  StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
321  static_cast<const void *>(NewEvent->getHandleRef()));
322  }
323  detail::emitInstrumentationGeneral(StreamID, InstanceID, CmdTraceEvent,
324  xpti::trace_task_end, nullptr);
325 #endif
326  return Result;
327  };
328 
329  bool DiscardEvent = false;
330  if (MQueue->supportsDiscardingPiEvents()) {
331  // Kernel only uses assert if it's non interop one
332  bool KernelUsesAssert =
333  !(MKernel && MKernel->isInterop()) &&
335  MKernelName.c_str());
336  DiscardEvent = !KernelUsesAssert;
337  }
338 
339  if (DiscardEvent) {
340  if (PI_SUCCESS != EnqueueKernel())
341  throw runtime_error("Enqueue process failed.",
342  PI_ERROR_INVALID_OPERATION);
343  } else {
344  NewEvent = std::make_shared<detail::event_impl>(MQueue);
345  NewEvent->setWorkerQueue(MQueue);
346  NewEvent->setContextImpl(MQueue->getContextImplPtr());
347  NewEvent->setStateIncomplete();
348  NewEvent->setSubmissionTime();
349 
350  if (PI_SUCCESS != EnqueueKernel())
351  throw runtime_error("Enqueue process failed.",
352  PI_ERROR_INVALID_OPERATION);
353  else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr)
354  NewEvent->setComplete();
355 
356  MLastEvent = detail::createSyclObjFromImpl<event>(NewEvent);
357  }
358  return MLastEvent;
359  }
360  }
361 
362  std::unique_ptr<detail::CG> CommandGroup;
363  switch (type) {
364  case detail::CG::Kernel: {
365  // Copy kernel name here instead of move so that it's available after
366  // running of this method by reductions implementation. This allows for
367  // assert feature to check if kernel uses assertions
368  CommandGroup.reset(new detail::CGExecKernel(
369  std::move(MNDRDesc), std::move(MHostKernel), std::move(MKernel),
370  std::move(MImpl->MKernelBundle), std::move(CGData), std::move(MArgs),
371  MKernelName.c_str(), std::move(MStreamStorage),
372  std::move(MImpl->MAuxiliaryResources), MCGType,
373  MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative, MCodeLoc));
374  break;
375  }
379  CommandGroup.reset(
380  new detail::CGCopy(MCGType, MSrcPtr, MDstPtr, std::move(CGData),
381  std::move(MImpl->MAuxiliaryResources), MCodeLoc));
382  break;
383  case detail::CG::Fill:
384  CommandGroup.reset(new detail::CGFill(std::move(MPattern), MDstPtr,
385  std::move(CGData), MCodeLoc));
386  break;
388  CommandGroup.reset(
389  new detail::CGUpdateHost(MDstPtr, std::move(CGData), MCodeLoc));
390  break;
391  case detail::CG::CopyUSM:
392  CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength,
393  std::move(CGData), MCodeLoc));
394  break;
395  case detail::CG::FillUSM:
396  CommandGroup.reset(new detail::CGFillUSM(
397  std::move(MPattern), MDstPtr, MLength, std::move(CGData), MCodeLoc));
398  break;
400  CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength,
401  std::move(CGData), MCodeLoc));
402  break;
404  CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, MImpl->MAdvice,
405  std::move(CGData), MCGType,
406  MCodeLoc));
407  break;
409  CommandGroup.reset(new detail::CGCopy2DUSM(
410  MSrcPtr, MDstPtr, MImpl->MSrcPitch, MImpl->MDstPitch, MImpl->MWidth,
411  MImpl->MHeight, std::move(CGData), MCodeLoc));
412  break;
414  CommandGroup.reset(new detail::CGFill2DUSM(
415  std::move(MPattern), MDstPtr, MImpl->MDstPitch, MImpl->MWidth,
416  MImpl->MHeight, std::move(CGData), MCodeLoc));
417  break;
419  CommandGroup.reset(new detail::CGMemset2DUSM(
420  MPattern[0], MDstPtr, MImpl->MDstPitch, MImpl->MWidth, MImpl->MHeight,
421  std::move(CGData), MCodeLoc));
422  break;
424  auto context = MGraph ? detail::getSyclObjImpl(MGraph->getContext())
425  : MQueue->getContextImplPtr();
426  CommandGroup.reset(new detail::CGHostTask(
427  std::move(MHostTask), MQueue, context, std::move(MArgs),
428  std::move(CGData), MCGType, MCodeLoc));
429  break;
430  }
431  case detail::CG::Barrier:
433  if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) {
434  // if no event to wait for was specified, we add all exit
435  // nodes/events of the graph
436  if (MEventsWaitWithBarrier.size() == 0) {
437  MEventsWaitWithBarrier = GraphImpl->getExitNodesEvents();
438  // Graph-wide barriers take precedence over previous one.
439  // We therefore remove the previous ones from ExtraDependencies list.
440  // The current barrier is then added to this list in the graph_impl.
441  std::vector<detail::EventImplPtr> EventsBarriers =
442  GraphImpl->removeBarriersFromExtraDependencies();
443  MEventsWaitWithBarrier.insert(std::end(MEventsWaitWithBarrier),
444  std::begin(EventsBarriers),
445  std::end(EventsBarriers));
446  }
447  CGData.MEvents.insert(std::end(CGData.MEvents),
448  std::begin(MEventsWaitWithBarrier),
449  std::end(MEventsWaitWithBarrier));
450  // Barrier node is implemented as an empty node in Graph
451  // but keep the barrier type to help managing dependencies
452  MCGType = detail::CG::Barrier;
453  CommandGroup.reset(
454  new detail::CG(detail::CG::Barrier, std::move(CGData), MCodeLoc));
455  } else {
456  CommandGroup.reset(
457  new detail::CGBarrier(std::move(MEventsWaitWithBarrier),
458  std::move(CGData), MCGType, MCodeLoc));
459  }
460  break;
461  }
463  CommandGroup.reset(new detail::CGCopyToDeviceGlobal(
464  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
465  std::move(CGData), MCodeLoc));
466  break;
467  }
469  CommandGroup.reset(new detail::CGCopyFromDeviceGlobal(
470  MSrcPtr, MDstPtr, MImpl->MIsDeviceImageScoped, MLength, MImpl->MOffset,
471  std::move(CGData), MCodeLoc));
472  break;
473  }
475  CommandGroup.reset(new detail::CGReadWriteHostPipe(
476  MImpl->HostPipeName, MImpl->HostPipeBlocking, MImpl->HostPipePtr,
477  MImpl->HostPipeTypeSize, MImpl->HostPipeRead, std::move(CGData),
478  MCodeLoc));
479  break;
480  }
482  std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph =
483  MQueue ? MQueue->getCommandGraph() : MGraph;
484 
485  // If a parent graph is set that means we are adding or recording a subgraph
486  // and we don't want to actually execute this command graph submission.
487  if (ParentGraph) {
489  if (MQueue) {
491  ParentGraph->MMutex);
492  }
493  CGData.MRequirements = MExecGraph->getRequirements();
494  // Here we are using the CommandGroup without passing a CommandBuffer to
495  // pass the exec_graph_impl and event dependencies. Since this subgraph CG
496  // will not be executed this is fine.
497  CommandGroup.reset(new sycl::detail::CGExecCommandBuffer(
498  nullptr, MExecGraph, std::move(CGData)));
499 
500  } else {
501  event GraphCompletionEvent =
502  MExecGraph->enqueue(MQueue, std::move(CGData));
503  MLastEvent = GraphCompletionEvent;
504  return MLastEvent;
505  }
506  } break;
508  CommandGroup.reset(new detail::CGCopyImage(
509  MSrcPtr, MDstPtr, MImpl->MImageDesc, MImpl->MImageFormat,
510  MImpl->MImageCopyFlags, MImpl->MSrcOffset, MImpl->MDestOffset,
511  MImpl->MHostExtent, MImpl->MCopyExtent, std::move(CGData), MCodeLoc));
512  break;
514  CommandGroup.reset(new detail::CGSemaphoreWait(
515  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
516  break;
518  CommandGroup.reset(new detail::CGSemaphoreSignal(
519  MImpl->MInteropSemaphoreHandle, std::move(CGData), MCodeLoc));
520  break;
521  case detail::CG::None:
523  std::cout << "WARNING: An empty command group is submitted." << std::endl;
524  }
525 
526  // Empty nodes are handled by Graph like standard nodes
527  // For Standard mode (non-graph),
528  // empty nodes are not sent to the scheduler to save time
529  if (MGraph || (MQueue && MQueue->getCommandGraph())) {
530  CommandGroup.reset(
531  new detail::CG(detail::CG::None, std::move(CGData), MCodeLoc));
532  } else {
533  detail::EventImplPtr Event = std::make_shared<sycl::detail::event_impl>();
534  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
535  return MLastEvent;
536  }
537  break;
538  }
539 
540  if (!CommandGroup)
541  throw sycl::runtime_error(
542  "Internal Error. Command group cannot be constructed.",
543  PI_ERROR_INVALID_OPERATION);
544 
545  // If there is a graph associated with the handler we are in the explicit
546  // graph mode, so we store the CG instead of submitting it to the scheduler,
547  // so it can be retrieved by the graph later.
548  if (MGraph) {
549  MGraphNodeCG = std::move(CommandGroup);
550  return detail::createSyclObjFromImpl<event>(
551  std::make_shared<detail::event_impl>());
552  }
553 
554  // If the queue has an associated graph then we need to take the CG and pass
555  // it to the graph to create a node, rather than submit it to the scheduler.
556  if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl) {
557  auto EventImpl = std::make_shared<detail::event_impl>();
558  std::shared_ptr<ext::oneapi::experimental::detail::node_impl> NodeImpl =
559  nullptr;
560 
561  // GraphImpl is read and written in this scope so we lock this graph
562  // with full priviledges.
564  GraphImpl->MMutex);
565 
567  MImpl->MUserFacingNodeType !=
569  ? MImpl->MUserFacingNodeType
571 
572  // Create a new node in the graph representing this command-group
573  if (MQueue->isInOrder()) {
574  // In-order queues create implicit linear dependencies between nodes.
575  // Find the last node added to the graph from this queue, so our new
576  // node can set it as a predecessor.
577  auto DependentNode = GraphImpl->getLastInorderNode(MQueue);
578 
579  NodeImpl = DependentNode
580  ? GraphImpl->add(NodeType, std::move(CommandGroup),
581  {DependentNode})
582  : GraphImpl->add(NodeType, std::move(CommandGroup));
583 
584  // If we are recording an in-order queue remember the new node, so it
585  // can be used as a dependency for any more nodes recorded from this
586  // queue.
587  GraphImpl->setLastInorderNode(MQueue, NodeImpl);
588  } else {
589  NodeImpl = GraphImpl->add(NodeType, std::move(CommandGroup));
590  }
591 
592  // Associate an event with this new node and return the event.
593  GraphImpl->addEventForNode(GraphImpl, EventImpl, NodeImpl);
594 
595  NodeImpl->MNDRangeUsed = MImpl->MNDRangeUsed;
596 
597  return detail::createSyclObjFromImpl<event>(EventImpl);
598  }
599 
601  std::move(CommandGroup), std::move(MQueue));
602 
603  MLastEvent = detail::createSyclObjFromImpl<event>(Event);
604  return MLastEvent;
605 }
606 
607 void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
608  MImpl->MAuxiliaryResources.push_back(ReduObj);
609 }
610 
611 void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl,
612  int AccTarget) {
613  if (getCommandGraph() &&
614  static_cast<detail::SYCLMemObjT *>(AccImpl->MSYCLMemObj)
615  ->needsWriteBack()) {
617  "Accessors to buffers which have write_back enabled "
618  "are not allowed to be used in command graphs.");
619  }
620  detail::Requirement *Req = AccImpl.get();
621  if (Req->MAccessMode != sycl::access_mode::read) {
622  auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
623  SYCLMemObj->handleWriteAccessorCreation();
624  }
625  // Add accessor to the list of requirements.
626  if (Req->MAccessRange.size() != 0)
627  CGData.MRequirements.push_back(Req);
628  // Store copy of the accessor.
629  CGData.MAccStorage.push_back(std::move(AccImpl));
630  // Add an accessor to the handler list of associated accessors.
631  // For associated accessors index does not means nothing.
632  MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor,
633  Req, AccTarget, /*index*/ 0);
634 }
635 
636 void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
637  access::target AccTarget) {
638  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
639  static_cast<int>(AccTarget));
640 }
641 
642 void handler::associateWithHandler(
643  detail::UnsampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
644  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
645  static_cast<int>(AccTarget));
646 }
647 
648 void handler::associateWithHandler(
649  detail::SampledImageAccessorBaseHost *AccBase, image_target AccTarget) {
650  associateWithHandlerCommon(detail::getSyclObjImpl(*AccBase),
651  static_cast<int>(AccTarget));
652 }
653 
654 static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index,
655  size_t &IndexShift, int Size,
656  bool IsKernelCreatedFromSource,
657  size_t GlobalSize,
658  std::vector<detail::ArgDesc> &Args,
659  bool isESIMD) {
661  if (AccImpl->PerWI)
662  AccImpl->resize(GlobalSize);
663 
664  Args.emplace_back(kernel_param_kind_t::kind_accessor, AccImpl, Size,
665  Index + IndexShift);
666 
667  // TODO ESIMD currently does not suport offset, memory and access ranges -
668  // accessor::init for ESIMD-mode accessor has a single field, translated
669  // to a single kernel argument set above.
670  if (!isESIMD && !IsKernelCreatedFromSource) {
671  // Dimensionality of the buffer is 1 when dimensionality of the
672  // accessor is 0.
673  const size_t SizeAccField =
674  sizeof(size_t) * (AccImpl->MDims == 0 ? 1 : AccImpl->MDims);
675  ++IndexShift;
676  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
677  &AccImpl->MAccessRange[0], SizeAccField,
678  Index + IndexShift);
679  ++IndexShift;
680  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
681  &AccImpl->MMemoryRange[0], SizeAccField,
682  Index + IndexShift);
683  ++IndexShift;
684  Args.emplace_back(kernel_param_kind_t::kind_std_layout,
685  &AccImpl->MOffset[0], SizeAccField, Index + IndexShift);
686  }
687 }
688 
689 void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
690  const int Size, const size_t Index, size_t &IndexShift,
691  bool IsKernelCreatedFromSource, bool IsESIMD) {
693 
694  switch (Kind) {
695  case kernel_param_kind_t::kind_std_layout:
696  case kernel_param_kind_t::kind_pointer: {
697  MArgs.emplace_back(Kind, Ptr, Size, Index + IndexShift);
698  break;
699  }
700  case kernel_param_kind_t::kind_stream: {
701  // Stream contains several accessors inside.
702  stream *S = static_cast<stream *>(Ptr);
703 
704  detail::AccessorBaseHost *GBufBase =
705  static_cast<detail::AccessorBaseHost *>(&S->GlobalBuf);
706  detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase);
707  detail::Requirement *GBufReq = GBufImpl.get();
708  addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size,
709  IsKernelCreatedFromSource,
710  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
711  ++IndexShift;
712  detail::AccessorBaseHost *GOffsetBase =
713  static_cast<detail::AccessorBaseHost *>(&S->GlobalOffset);
714  detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase);
715  detail::Requirement *GOffsetReq = GOfssetImpl.get();
716  addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size,
717  IsKernelCreatedFromSource,
718  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
719  ++IndexShift;
720  detail::AccessorBaseHost *GFlushBase =
721  static_cast<detail::AccessorBaseHost *>(&S->GlobalFlushBuf);
722  detail::AccessorImplPtr GFlushImpl = detail::getSyclObjImpl(*GFlushBase);
723  detail::Requirement *GFlushReq = GFlushImpl.get();
724 
725  size_t GlobalSize = MNDRDesc.GlobalSize.size();
726  // If work group size wasn't set explicitly then it must be recieved
727  // from kernel attribute or set to default values.
728  // For now we can't get this attribute here.
729  // So we just suppose that WG size is always default for stream.
730  // TODO adjust MNDRDesc when device image contains kernel's attribute
731  if (GlobalSize == 0) {
732  // Suppose that work group size is 1 for every dimension
733  GlobalSize = MNDRDesc.NumWorkGroups.size();
734  }
735  addArgsForGlobalAccessor(GFlushReq, Index, IndexShift, Size,
736  IsKernelCreatedFromSource, GlobalSize, MArgs,
737  IsESIMD);
738  ++IndexShift;
739  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout,
740  &S->FlushBufferSize, sizeof(S->FlushBufferSize),
741  Index + IndexShift);
742 
743  break;
744  }
745  case kernel_param_kind_t::kind_accessor: {
746  // For args kind of accessor Size is information about accessor.
747  // The first 11 bits of Size encodes the accessor target.
748  const access::target AccTarget =
749  static_cast<access::target>(Size & AccessTargetMask);
750  switch (AccTarget) {
752  case access::target::constant_buffer: {
753  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
754  addArgsForGlobalAccessor(AccImpl, Index, IndexShift, Size,
755  IsKernelCreatedFromSource,
756  MNDRDesc.GlobalSize.size(), MArgs, IsESIMD);
757  break;
758  }
759  case access::target::local: {
760  detail::LocalAccessorImplHost *LAcc =
761  static_cast<detail::LocalAccessorImplHost *>(Ptr);
762 
763  range<3> &Size = LAcc->MSize;
764  const int Dims = LAcc->MDims;
765  int SizeInBytes = LAcc->MElemSize;
766  for (int I = 0; I < Dims; ++I)
767  SizeInBytes *= Size[I];
768  // Some backends do not accept zero-sized local memory arguments, so we
769  // make it a minimum allocation of 1 byte.
770  SizeInBytes = std::max(SizeInBytes, 1);
771  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
772  SizeInBytes, Index + IndexShift);
773  // TODO ESIMD currently does not suport MSize field passing yet
774  // accessor::init for ESIMD-mode accessor has a single field, translated
775  // to a single kernel argument set above.
776  if (!IsESIMD && !IsKernelCreatedFromSource) {
777  ++IndexShift;
778  const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]);
779  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
780  SizeAccField, Index + IndexShift);
781  ++IndexShift;
782  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
783  SizeAccField, Index + IndexShift);
784  ++IndexShift;
785  MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, &Size,
786  SizeAccField, Index + IndexShift);
787  }
788  break;
789  }
792  detail::Requirement *AccImpl = static_cast<detail::Requirement *>(Ptr);
793  MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
794  if (!IsKernelCreatedFromSource) {
795  // TODO Handle additional kernel arguments for image class
796  // if the compiler front-end adds them.
797  }
798  break;
799  }
802  case access::target::host_buffer: {
803  throw sycl::invalid_parameter_error("Unsupported accessor target case.",
804  PI_ERROR_INVALID_OPERATION);
805  break;
806  }
807  }
808  break;
809  }
810  case kernel_param_kind_t::kind_sampler: {
811  MArgs.emplace_back(kernel_param_kind_t::kind_sampler, Ptr, sizeof(sampler),
812  Index + IndexShift);
813  break;
814  }
815  case kernel_param_kind_t::kind_specialization_constants_buffer: {
816  MArgs.emplace_back(
817  kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size,
818  Index + IndexShift);
819  break;
820  }
821  case kernel_param_kind_t::kind_invalid:
822  throw runtime_error("Invalid kernel param kind", PI_ERROR_INVALID_VALUE);
823  break;
824  }
825 }
826 
827 // The argument can take up more space to store additional information about
828 // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor.
829 // We use the worst-case estimate because the lifetime of the vector is short.
830 // In processArg the kind_stream case introduces the maximum number of
831 // additional arguments. The case adds additional 12 arguments to the currently
832 // processed argument, hence worst-case estimate is 12+1=13.
833 // TODO: the constant can be removed if the size of MArgs will be calculated at
834 // compile time.
835 inline constexpr size_t MaxNumAdditionalArgs = 13;
836 
837 void handler::extractArgsAndReqs() {
838  assert(MKernel && "MKernel is not initialized");
839  std::vector<detail::ArgDesc> UnPreparedArgs = std::move(MArgs);
840  MArgs.clear();
841 
842  std::sort(
843  UnPreparedArgs.begin(), UnPreparedArgs.end(),
844  [](const detail::ArgDesc &first, const detail::ArgDesc &second) -> bool {
845  return (first.MIndex < second.MIndex);
846  });
847 
848  const bool IsKernelCreatedFromSource = MKernel->isCreatedFromSource();
849  MArgs.reserve(MaxNumAdditionalArgs * UnPreparedArgs.size());
850 
851  size_t IndexShift = 0;
852  for (size_t I = 0; I < UnPreparedArgs.size(); ++I) {
853  void *Ptr = UnPreparedArgs[I].MPtr;
854  const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType;
855  const int &Size = UnPreparedArgs[I].MSize;
856  const int Index = UnPreparedArgs[I].MIndex;
857  processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource,
858  false);
859  }
860 }
861 
862 void handler::extractArgsAndReqsFromLambda(
863  char *LambdaPtr, size_t KernelArgsNum,
864  const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
865  const bool IsKernelCreatedFromSource = false;
866  size_t IndexShift = 0;
867  MArgs.reserve(MaxNumAdditionalArgs * KernelArgsNum);
868 
869  for (size_t I = 0; I < KernelArgsNum; ++I) {
870  void *Ptr = LambdaPtr + KernelArgs[I].offset;
871  const detail::kernel_param_kind_t &Kind = KernelArgs[I].kind;
872  const int &Size = KernelArgs[I].info;
874  // For args kind of accessor Size is information about accessor.
875  // The first 11 bits of Size encodes the accessor target.
876  const access::target AccTarget =
877  static_cast<access::target>(Size & AccessTargetMask);
878  if ((AccTarget == access::target::device ||
879  AccTarget == access::target::constant_buffer) ||
880  (AccTarget == access::target::image ||
881  AccTarget == access::target::image_array)) {
882  detail::AccessorBaseHost *AccBase =
883  static_cast<detail::AccessorBaseHost *>(Ptr);
884  Ptr = detail::getSyclObjImpl(*AccBase).get();
885  } else if (AccTarget == access::target::local) {
886  detail::LocalAccessorBaseHost *LocalAccBase =
887  static_cast<detail::LocalAccessorBaseHost *>(Ptr);
888  Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
889  }
890  }
891  processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource,
892  IsESIMD);
893  }
894 }
895 
896 // Calling methods of kernel_impl requires knowledge of class layout.
897 // As this is impossible in header, there's a function that calls necessary
898 // method inside the library and returns the result.
899 detail::string handler::getKernelName() {
900  return detail::string{MKernel->get_info<info::kernel::function_name>()};
901 }
902 
903 void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) {
904  auto UsedKernelBundleImplPtr =
905  getOrInsertHandlerKernelBundle(/*Insert=*/false);
906  if (!UsedKernelBundleImplPtr)
907  return;
908 
909  // Implicit kernel bundles are populated late so we ignore them
910  if (!MImpl->isStateExplicitKernelBundle())
911  return;
912 
913  kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
914  device Dev =
915  MGraph ? MGraph->getDevice() : detail::getDeviceFromHandler(*this);
916  if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev))
917  throw sycl::exception(
919  "The kernel bundle in use does not contain the kernel");
920 }
921 
922 void handler::ext_oneapi_barrier(const std::vector<event> &WaitList) {
923  throwIfActionIsCreated();
924  MCGType = detail::CG::BarrierWaitlist;
925  MEventsWaitWithBarrier.resize(WaitList.size());
926  std::transform(
927  WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(),
928  [](const event &Event) { return detail::getSyclObjImpl(Event); });
929 }
930 
931 using namespace sycl::detail;
932 bool handler::DisableRangeRounding() {
934 }
935 
936 bool handler::RangeRoundingTrace() {
938 }
939 
940 void handler::GetRangeRoundingSettings(size_t &MinFactor, size_t &GoodFactor,
941  size_t &MinRange) {
942  SYCLConfig<SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS>::GetSettings(
943  MinFactor, GoodFactor, MinRange);
944 }
945 
946 void handler::memcpy(void *Dest, const void *Src, size_t Count) {
947  throwIfActionIsCreated();
948  MSrcPtr = const_cast<void *>(Src);
949  MDstPtr = Dest;
950  MLength = Count;
951  setType(detail::CG::CopyUSM);
952 }
953 
954 void handler::memset(void *Dest, int Value, size_t Count) {
955  throwIfActionIsCreated();
956  MDstPtr = Dest;
957  MPattern.push_back(static_cast<char>(Value));
958  MLength = Count;
959  setUserFacingNodeType(ext::oneapi::experimental::node_type::memset);
960  setType(detail::CG::FillUSM);
961 }
962 
963 void handler::prefetch(const void *Ptr, size_t Count) {
964  throwIfActionIsCreated();
965  MDstPtr = const_cast<void *>(Ptr);
966  MLength = Count;
967  setType(detail::CG::PrefetchUSM);
968 }
969 
970 void handler::mem_advise(const void *Ptr, size_t Count, int Advice) {
971  throwIfActionIsCreated();
972  MDstPtr = const_cast<void *>(Ptr);
973  MLength = Count;
974  MImpl->MAdvice = static_cast<pi_mem_advice>(Advice);
975  setType(detail::CG::AdviseUSM);
976 }
977 
978 void handler::ext_oneapi_memcpy2d_impl(void *Dest, size_t DestPitch,
979  const void *Src, size_t SrcPitch,
980  size_t Width, size_t Height) {
981  // Checks done in callers.
982  MSrcPtr = const_cast<void *>(Src);
983  MDstPtr = Dest;
984  MImpl->MSrcPitch = SrcPitch;
985  MImpl->MDstPitch = DestPitch;
986  MImpl->MWidth = Width;
987  MImpl->MHeight = Height;
988  setType(detail::CG::Copy2DUSM);
989 }
990 
991 void handler::ext_oneapi_fill2d_impl(void *Dest, size_t DestPitch,
992  const void *Value, size_t ValueSize,
993  size_t Width, size_t Height) {
994  // Checks done in callers.
995  MDstPtr = Dest;
996  MPattern.resize(ValueSize);
997  std::memcpy(MPattern.data(), Value, ValueSize);
998  MImpl->MDstPitch = DestPitch;
999  MImpl->MWidth = Width;
1000  MImpl->MHeight = Height;
1001  setType(detail::CG::Fill2DUSM);
1002 }
1003 
1004 void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
1005  size_t Width, size_t Height) {
1006  // Checks done in callers.
1007  MDstPtr = Dest;
1008  MPattern.push_back(static_cast<char>(Value));
1009  MImpl->MDstPitch = DestPitch;
1010  MImpl->MWidth = Width;
1011  MImpl->MHeight = Height;
1012  setType(detail::CG::Memset2DUSM);
1013 }
1014 
1018  throwIfGraphAssociated<
1019  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1020  sycl_ext_oneapi_bindless_images>();
1021  Desc.verify();
1022 
1023  MSrcPtr = Src;
1024  MDstPtr = Dest.raw_handle;
1025 
1027  PiDesc.image_width = Desc.width;
1028  PiDesc.image_height = Desc.height;
1029  PiDesc.image_depth = Desc.depth;
1030  PiDesc.image_array_size = Desc.array_size;
1031 
1032  if (Desc.array_size > 1) {
1033  // Image Array.
1034  PiDesc.image_type =
1036 
1037  // Cubemap.
1038  PiDesc.image_type =
1039  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1041  : PiDesc.image_type;
1042  } else {
1043  PiDesc.image_type =
1044  Desc.depth > 0
1047  }
1048 
1050  PiFormat.image_channel_data_type =
1052  PiFormat.image_channel_order =
1054 
1055  MImpl->MSrcOffset = {0, 0, 0};
1056  MImpl->MDestOffset = {0, 0, 0};
1057  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1058  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1059  MImpl->MImageDesc = PiDesc;
1060  MImpl->MImageFormat = PiFormat;
1061  MImpl->MImageCopyFlags =
1063  setType(detail::CG::CopyImage);
1064 }
1065 
1067  void *Src, sycl::range<3> SrcOffset, sycl::range<3> SrcExtent,
1070  sycl::range<3> CopyExtent) {
1071  throwIfGraphAssociated<
1072  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1073  sycl_ext_oneapi_bindless_images>();
1074  DestImgDesc.verify();
1075 
1076  MSrcPtr = Src;
1077  MDstPtr = Dest.raw_handle;
1078 
1080  PiDesc.image_width = DestImgDesc.width;
1081  PiDesc.image_height = DestImgDesc.height;
1082  PiDesc.image_depth = DestImgDesc.depth;
1083  PiDesc.image_array_size = DestImgDesc.array_size;
1084 
1085  if (DestImgDesc.array_size > 1) {
1086  // Image Array.
1087  PiDesc.image_type = DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1089 
1090  // Cubemap.
1091  PiDesc.image_type =
1092  DestImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1094  : PiDesc.image_type;
1095  } else {
1096  PiDesc.image_type = DestImgDesc.depth > 0
1098  : (DestImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1100  }
1101 
1103  PiFormat.image_channel_data_type =
1105  PiFormat.image_channel_order =
1107 
1108  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1109  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1110  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1111  MImpl->MHostExtent = {SrcExtent[0], SrcExtent[1], SrcExtent[2]};
1112  MImpl->MImageDesc = PiDesc;
1113  MImpl->MImageFormat = PiFormat;
1114  MImpl->MImageCopyFlags =
1116  setType(detail::CG::CopyImage);
1117 }
1118 
1122  throwIfGraphAssociated<
1123  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1124  sycl_ext_oneapi_bindless_images>();
1125  Desc.verify();
1126 
1127  MSrcPtr = Src.raw_handle;
1128  MDstPtr = Dest;
1129 
1131  PiDesc.image_width = Desc.width;
1132  PiDesc.image_height = Desc.height;
1133  PiDesc.image_depth = Desc.depth;
1134  PiDesc.image_array_size = Desc.array_size;
1135 
1136  if (Desc.array_size > 1) {
1137  // Image Array.
1138  PiDesc.image_type =
1140 
1141  // Cubemap.
1142  PiDesc.image_type =
1143  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1145  : PiDesc.image_type;
1146  } else {
1147  PiDesc.image_type =
1148  Desc.depth > 0
1151  }
1152 
1154  PiFormat.image_channel_data_type =
1156  PiFormat.image_channel_order =
1158 
1159  MImpl->MSrcOffset = {0, 0, 0};
1160  MImpl->MDestOffset = {0, 0, 0};
1161  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1162  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1163  MImpl->MImageDesc = PiDesc;
1164  MImpl->MImageFormat = PiFormat;
1165  MImpl->MImageCopyFlags =
1167  setType(detail::CG::CopyImage);
1168 }
1169 
1172  const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest,
1173  sycl::range<3> DestOffset, sycl::range<3> DestExtent,
1174  sycl::range<3> CopyExtent) {
1175  throwIfGraphAssociated<
1176  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1177  sycl_ext_oneapi_bindless_images>();
1178  SrcImgDesc.verify();
1179 
1180  MSrcPtr = Src.raw_handle;
1181  MDstPtr = Dest;
1182 
1184  PiDesc.image_width = SrcImgDesc.width;
1185  PiDesc.image_height = SrcImgDesc.height;
1186  PiDesc.image_depth = SrcImgDesc.depth;
1187  PiDesc.image_array_size = SrcImgDesc.array_size;
1188 
1189  if (SrcImgDesc.array_size > 1) {
1190  // Image Array.
1191  PiDesc.image_type = SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1193 
1194  // Cubemap.
1195  PiDesc.image_type =
1196  SrcImgDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1198  : PiDesc.image_type;
1199  } else {
1200  PiDesc.image_type = SrcImgDesc.depth > 0
1202  : (SrcImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1204  }
1205 
1207  PiFormat.image_channel_data_type =
1209  PiFormat.image_channel_order =
1211 
1212  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1213  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1214  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1215  MImpl->MHostExtent = {DestExtent[0], DestExtent[1], DestExtent[2]};
1216  MImpl->MImageDesc = PiDesc;
1217  MImpl->MImageFormat = PiFormat;
1218  MImpl->MImageCopyFlags =
1220  setType(detail::CG::CopyImage);
1221 }
1222 
1224  void *Src, void *Dest,
1225  const ext::oneapi::experimental::image_descriptor &Desc, size_t Pitch) {
1226  throwIfGraphAssociated<
1227  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1228  sycl_ext_oneapi_bindless_images>();
1229  Desc.verify();
1230 
1231  MSrcPtr = Src;
1232  MDstPtr = Dest;
1233 
1235  PiDesc.image_width = Desc.width;
1236  PiDesc.image_height = Desc.height;
1237  PiDesc.image_depth = Desc.depth;
1238  PiDesc.image_array_size = Desc.array_size;
1239 
1240  if (Desc.array_size > 1) {
1241  // Image Array.
1242  PiDesc.image_type =
1244 
1245  // Cubemap.
1246  PiDesc.image_type =
1247  Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
1249  : PiDesc.image_type;
1250  } else {
1251  PiDesc.image_type =
1252  Desc.depth > 0
1255  }
1256 
1258  PiFormat.image_channel_data_type =
1260  PiFormat.image_channel_order =
1262 
1263  MImpl->MSrcOffset = {0, 0, 0};
1264  MImpl->MDestOffset = {0, 0, 0};
1265  MImpl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
1266  MImpl->MHostExtent = {Desc.width, Desc.height, Desc.depth};
1267  MImpl->MImageDesc = PiDesc;
1268  MImpl->MImageDesc.image_row_pitch = Pitch;
1269  MImpl->MImageFormat = PiFormat;
1270  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1271  get_pointer_type(Src, MQueue->get_context()),
1272  get_pointer_type(Dest, MQueue->get_context()));
1273  setType(detail::CG::CopyImage);
1274 }
1275 
1277  void *Src, sycl::range<3> SrcOffset, void *Dest, sycl::range<3> DestOffset,
1278  const ext::oneapi::experimental::image_descriptor &DeviceImgDesc,
1279  size_t DeviceRowPitch, sycl::range<3> HostExtent,
1280  sycl::range<3> CopyExtent) {
1281  throwIfGraphAssociated<
1282  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1283  sycl_ext_oneapi_bindless_images>();
1284  DeviceImgDesc.verify();
1285 
1286  MSrcPtr = Src;
1287  MDstPtr = Dest;
1288 
1290  PiDesc.image_width = DeviceImgDesc.width;
1291  PiDesc.image_height = DeviceImgDesc.height;
1292  PiDesc.image_depth = DeviceImgDesc.depth;
1293  PiDesc.image_array_size = DeviceImgDesc.array_size;
1294 
1295  if (DeviceImgDesc.array_size > 1) {
1296  // Image Array.
1297  PiDesc.image_type = DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D_ARRAY
1299 
1300  // Cubemap.
1301  PiDesc.image_type =
1302  DeviceImgDesc.type ==
1303  sycl::ext::oneapi::experimental::image_type::cubemap
1305  : PiDesc.image_type;
1306  } else {
1307  PiDesc.image_type = DeviceImgDesc.depth > 0
1309  : (DeviceImgDesc.height > 0 ? PI_MEM_TYPE_IMAGE2D
1311  }
1312 
1314  PiFormat.image_channel_data_type =
1316  PiFormat.image_channel_order =
1318 
1319  MImpl->MSrcOffset = {SrcOffset[0], SrcOffset[1], SrcOffset[2]};
1320  MImpl->MDestOffset = {DestOffset[0], DestOffset[1], DestOffset[2]};
1321  MImpl->MHostExtent = {HostExtent[0], HostExtent[1], HostExtent[2]};
1322  MImpl->MCopyExtent = {CopyExtent[0], CopyExtent[1], CopyExtent[2]};
1323  MImpl->MImageDesc = PiDesc;
1324  MImpl->MImageDesc.image_row_pitch = DeviceRowPitch;
1325  MImpl->MImageFormat = PiFormat;
1326  MImpl->MImageCopyFlags = detail::getPiImageCopyFlags(
1327  get_pointer_type(Src, MQueue->get_context()),
1328  get_pointer_type(Dest, MQueue->get_context()));
1329  setType(detail::CG::CopyImage);
1330 }
1331 
1334  throwIfGraphAssociated<
1335  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1336  sycl_ext_oneapi_bindless_images>();
1337  MImpl->MInteropSemaphoreHandle =
1339  setType(detail::CG::SemaphoreWait);
1340 }
1341 
1344  throwIfGraphAssociated<
1345  ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
1346  sycl_ext_oneapi_bindless_images>();
1347  MImpl->MInteropSemaphoreHandle =
1349  setType(detail::CG::SemaphoreSignal);
1350 }
1351 
1353  const kernel_bundle<bundle_state::executable> &ExecBundle) {
1354  std::shared_ptr<detail::queue_impl> PrimaryQueue =
1355  MImpl->MSubmissionPrimaryQueue;
1356  if ((!MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) ||
1357  (MGraph && (MGraph->getContext() != ExecBundle.get_context())))
1358  throw sycl::exception(
1360  "Context associated with the primary queue is different from the "
1361  "context associated with the kernel bundle");
1362 
1363  std::shared_ptr<detail::queue_impl> SecondaryQueue =
1364  MImpl->MSubmissionSecondaryQueue;
1365  if (SecondaryQueue &&
1366  SecondaryQueue->get_context() != ExecBundle.get_context())
1367  throw sycl::exception(
1369  "Context associated with the secondary queue is different from the "
1370  "context associated with the kernel bundle");
1371 
1372  setStateExplicitKernelBundle();
1373  setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle));
1374 }
1375 
1377  auto EventImpl = detail::getSyclObjImpl(Event);
1378  if (EventImpl->isDiscarded()) {
1380  "Queue operation cannot depend on discarded event.");
1381  }
1382  if (auto Graph = getCommandGraph(); Graph) {
1383  auto EventGraph = EventImpl->getCommandGraph();
1384  if (EventGraph == nullptr) {
1385  throw sycl::exception(
1387  "Graph nodes cannot depend on events from outside the graph.");
1388  }
1389  if (EventGraph != Graph) {
1390  throw sycl::exception(
1392  "Graph nodes cannot depend on events from another graph.");
1393  }
1394  }
1395  CGData.MEvents.push_back(EventImpl);
1396 }
1397 
1398 void handler::depends_on(const std::vector<event> &Events) {
1399  for (const event &Event : Events) {
1400  depends_on(Event);
1401  }
1402 }
1403 
1404 static bool
1405 checkContextSupports(const std::shared_ptr<detail::context_impl> &ContextImpl,
1406  sycl::detail::pi::PiContextInfo InfoQuery) {
1407  auto &Plugin = ContextImpl->getPlugin();
1408  pi_bool SupportsOp = false;
1409  Plugin->call<detail::PiApiKind::piContextGetInfo>(ContextImpl->getHandleRef(),
1410  InfoQuery, sizeof(pi_bool),
1411  &SupportsOp, nullptr);
1412  return SupportsOp;
1413 }
1414 
1415 bool handler::supportsUSMMemcpy2D() {
1416  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1417  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1418  if (QueueImpl &&
1419  !checkContextSupports(QueueImpl->getContextImplPtr(),
1421  return false;
1422  }
1423  return true;
1424 }
1425 
1426 bool handler::supportsUSMFill2D() {
1427  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1428  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1429  if (QueueImpl &&
1430  !checkContextSupports(QueueImpl->getContextImplPtr(),
1432  return false;
1433  }
1434  return true;
1435 }
1436 
1437 bool handler::supportsUSMMemset2D() {
1438  for (const std::shared_ptr<detail::queue_impl> &QueueImpl :
1439  {MImpl->MSubmissionPrimaryQueue, MImpl->MSubmissionSecondaryQueue}) {
1440  if (QueueImpl &&
1441  !checkContextSupports(QueueImpl->getContextImplPtr(),
1443  return false;
1444  }
1445  return true;
1446 }
1447 
1448 id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) {
1449  device Dev = MQueue->get_device();
1450  range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
1451  Dev.get_info<info::device::max_compute_units>();
1452  return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
1453 }
1454 
1455 void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr,
1456  size_t Size, bool Block) {
1457  MImpl->HostPipeName = Name.data();
1458  MImpl->HostPipePtr = Ptr;
1459  MImpl->HostPipeTypeSize = Size;
1460  MImpl->HostPipeBlocking = Block;
1461  MImpl->HostPipeRead = 1;
1463 }
1464 
1465 void handler::ext_intel_write_host_pipe(detail::string_view Name, void *Ptr,
1466  size_t Size, bool Block) {
1467  MImpl->HostPipeName = Name.data();
1468  MImpl->HostPipePtr = Ptr;
1469  MImpl->HostPipeTypeSize = Size;
1470  MImpl->HostPipeBlocking = Block;
1471  MImpl->HostPipeRead = 0;
1473 }
1474 
1475 void handler::memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src,
1476  bool IsDeviceImageScoped, size_t NumBytes,
1477  size_t Offset) {
1478  throwIfActionIsCreated();
1479  MSrcPtr = const_cast<void *>(Src);
1480  MDstPtr = const_cast<void *>(DeviceGlobalPtr);
1481  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1482  MLength = NumBytes;
1483  MImpl->MOffset = Offset;
1485 }
1486 
1487 void handler::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
1488  bool IsDeviceImageScoped, size_t NumBytes,
1489  size_t Offset) {
1490  throwIfActionIsCreated();
1491  MSrcPtr = const_cast<void *>(DeviceGlobalPtr);
1492  MDstPtr = Dest;
1493  MImpl->MIsDeviceImageScoped = IsDeviceImageScoped;
1494  MLength = NumBytes;
1495  MImpl->MOffset = Offset;
1497 }
1498 
1499 void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr,
1500  const void *Src,
1501  size_t DeviceGlobalTSize,
1502  bool IsDeviceImageScoped,
1503  size_t NumBytes, size_t Offset) {
1504  std::weak_ptr<detail::context_impl> WeakContextImpl =
1505  MQueue->getContextImplPtr();
1506  std::weak_ptr<detail::device_impl> WeakDeviceImpl =
1507  MQueue->getDeviceImplPtr();
1508  host_task([=] {
1509  // Capture context and device as weak to avoid keeping them alive for too
1510  // long. If they are dead by the time this executes, the operation would not
1511  // have been visible anyway.
1512  std::shared_ptr<detail::context_impl> ContextImpl = WeakContextImpl.lock();
1513  std::shared_ptr<detail::device_impl> DeviceImpl = WeakDeviceImpl.lock();
1514  if (ContextImpl && DeviceImpl)
1515  ContextImpl->memcpyToHostOnlyDeviceGlobal(
1516  DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize,
1517  IsDeviceImageScoped, NumBytes, Offset);
1518  });
1519 }
1520 
1521 void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest,
1522  const void *DeviceGlobalPtr,
1523  bool IsDeviceImageScoped,
1524  size_t NumBytes, size_t Offset) {
1525  const std::shared_ptr<detail::context_impl> &ContextImpl =
1526  MQueue->getContextImplPtr();
1527  const std::shared_ptr<detail::device_impl> &DeviceImpl =
1528  MQueue->getDeviceImplPtr();
1529  host_task([=] {
1530  // Unlike memcpy to device_global, we need to keep the context and device
1531  // alive in the capture of this operation as we must be able to correctly
1532  // copy the value to the user-specified pointer.
1533  ContextImpl->memcpyFromHostOnlyDeviceGlobal(
1534  DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes,
1535  Offset);
1536  });
1537 }
1538 
1539 const std::shared_ptr<detail::context_impl> &
1540 handler::getContextImplPtr() const {
1541  return MQueue->getContextImplPtr();
1542 }
1543 
1544 void handler::setKernelCacheConfig(
1546  MImpl->MKernelCacheConfig = Config;
1547 }
1548 
1549 void handler::setKernelIsCooperative(bool KernelIsCooperative) {
1550  MImpl->MKernelIsCooperative = KernelIsCooperative;
1551 }
1552 
1556  Graph) {
1558  MExecGraph = detail::getSyclObjImpl(Graph);
1559 }
1560 
1561 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1562 handler::getCommandGraph() const {
1563  if (MGraph) {
1564  return MGraph;
1565  }
1566  return MQueue->getCommandGraph();
1567 }
1568 
1569 void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) {
1570  MImpl->MUserFacingNodeType = Type;
1571 }
1572 
1573 std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
1575  std::array<size_t, 3> PiResult = {};
1576  auto Ret = Dev->getPlugin()->call_nocheck<PiApiKind::piDeviceGetInfo>(
1577  Dev->getHandleRef(),
1578  PiInfoCode<
1579  ext::oneapi::experimental::info::device::max_work_groups<3>>::value,
1580  sizeof(PiResult), &PiResult, nullptr);
1581  if (Ret == PI_SUCCESS) {
1582  return PiResult;
1583  }
1584  return {};
1585 }
1586 
1587 std::tuple<std::array<size_t, 3>, bool> handler::getMaxWorkGroups_v2() {
1588  auto ImmRess = getMaxWorkGroups();
1589  if (ImmRess)
1590  return {*ImmRess, true};
1591  return {std::array<size_t, 3>{0, 0, 0}, false};
1592 }
1593 
1594 void handler::setNDRangeUsed(bool Value) { MImpl->MNDRangeUsed = Value; }
1595 
1596 void handler::registerDynamicParameter(
1597  ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase,
1598  int ArgIndex) {
1599  if (MQueue && MQueue->getCommandGraph()) {
1600  throw sycl::exception(
1602  "Dynamic Parameters cannot be used with Graph Queue recording.");
1603  }
1604  if (!MGraph) {
1605  throw sycl::exception(
1607  "Dynamic Parameters cannot be used with normal SYCL submissions");
1608  }
1609 
1610  auto ParamImpl = detail::getSyclObjImpl(DynamicParamBase);
1611  if (ParamImpl->MGraph != this->MGraph) {
1612  throw sycl::exception(
1614  "Cannot use a Dynamic Parameter with a node associated with a graph "
1615  "other than the one it was created with.");
1616  }
1617  MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex);
1618 }
1619 } // namespace _V1
1620 } // namespace sycl
sycl::range< 3 > GlobalSize
Definition: cg_types.hpp:129
sycl::range< 3 > NumWorkGroups
Number of workgroups, used to record the number of workgroups from the simplest form of parallel_for_...
Definition: cg_types.hpp:135
sycl::id< 3 > GlobalOffset
Definition: cg_types.hpp:131
sycl::range< 3 > LocalSize
Definition: cg_types.hpp:130
static ProgramManager & getInstance()
kernel_id getSYCLKernelID(const std::string &KernelName)
DeviceGlobalMapEntry * getDeviceGlobalEntry(const void *DeviceGlobalPtr)
bool kernelUsesAssert(const std::string &KernelName) const
static Scheduler & getInstance()
Definition: scheduler.cpp:261
EventImplPtr addCG(std::unique_ptr< detail::CG > CommandGroup, const QueueImplPtr &Queue, sycl::detail::pi::PiExtCommandBuffer CommandBuffer=nullptr, const std::vector< sycl::detail::pi::PiExtSyncPoint > &Dependencies={})
Registers a command group, and adds it to the dependency graph.
Definition: scheduler.cpp:95
static bool areEventsSafeForSchedulerBypass(const std::vector< sycl::event > &DepEvents, ContextImplPtr Context)
Definition: scheduler.cpp:742
const char * c_str() const noexcept
Definition: string.hpp:60
An event object can be used to synchronize memory transfers, enqueues of kernels and signaling barrie...
Definition: event.hpp:44
std::unique_lock< std::shared_mutex > WriteLock
Definition: graph_impl.hpp:797
Command group handler class.
Definition: handler.hpp:458
void depends_on(event Event)
Registers event dependencies on this command group.
Definition: handler.cpp:1376
void ext_oneapi_wait_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue with a non-blocking wait on an external semaphore.
Definition: handler.cpp:1332
void ext_oneapi_graph(ext::oneapi::experimental::command_graph< ext::oneapi::experimental::graph_state::executable > Graph)
Executes a command_graph.
Definition: handler.cpp:1553
void memcpy(void *Dest, const void *Src, size_t Count)
Copies data from one memory region to another, each is either a host pointer or a pointer within USM ...
Definition: handler.cpp:946
void ext_oneapi_copy(void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &DestImgDesc)
Copies data from one memory region to another, where Src is a USM pointer and Dest is an opaque image...
Definition: handler.cpp:1015
void mem_advise(const void *Ptr, size_t Length, int Advice)
Provides additional information to the underlying runtime about how different allocations are used.
Definition: handler.cpp:970
void prefetch(const void *Ptr, size_t Count)
Provides hints to the runtime library that data should be made available on a device earlier than Uni...
Definition: handler.cpp:963
void memset(void *Dest, int Value, size_t Count)
Fills the memory pointed by a USM pointer with the value specified.
Definition: handler.cpp:954
void ext_oneapi_barrier()
Prevents any commands submitted afterward to this queue from executing until all commands previously ...
Definition: handler.hpp:2883
friend class stream
Definition: handler.hpp:3346
std::enable_if_t< detail::check_fn_signature< std::remove_reference_t< FuncT >, void()>::value||detail::check_fn_signature< std::remove_reference_t< FuncT >, void(interop_handle)>::value > host_task(FuncT &&Func)
Enqueues a command to the SYCL runtime to invoke Func once.
Definition: handler.hpp:2040
void ext_oneapi_signal_external_semaphore(sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle)
Instruct the queue to signal the external semaphore once all previous commands have completed executi...
Definition: handler.cpp:1342
void use_kernel_bundle(const kernel_bundle< bundle_state::executable > &ExecBundle)
Definition: handler.cpp:1352
context get_context() const noexcept
Defines the iteration domain of either a single work-group in a parallel dispatch,...
Definition: range.hpp:26
__SYCL_EXTERN_STREAM_ATTRS ostream cout
Linked to standard output.
constexpr tuple_element< I, tuple< Types... > >::type & get(sycl::detail::tuple< Types... > &Arg) noexcept
Definition: tuple.hpp:198
bool trace(TraceLevel level)
Definition: pi.cpp:366
::pi_result PiResult
Definition: pi.hpp:128
::pi_interop_semaphore_handle PiInteropSemaphoreHandle
Definition: pi.hpp:165
sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order)
Definition: image_impl.cpp:111
void * getValueFromDynamicParameter(ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase)
Definition: handler.cpp:74
device getDeviceFromHandler(handler &CommandGroupHandlerRef)
Definition: accessor.cpp:16
constexpr const char * SYCL_STREAM_NAME
bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr)
Definition: handler.cpp:39
decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject)
Definition: impl_utils.hpp:30
T createSyclObjFromImpl(decltype(T::impl) ImplObj)
Definition: impl_utils.hpp:48
std::shared_ptr< event_impl > EventImplPtr
Definition: cg.hpp:43
AccessorImplHost Requirement
pi_int32 enqueueImpKernel(const QueueImplPtr &Queue, NDRDescT &NDRDesc, std::vector< ArgDesc > &Args, const std::shared_ptr< detail::kernel_bundle_impl > &KernelBundleImplPtr, const std::shared_ptr< detail::kernel_impl > &MSyclKernel, const std::string &KernelName, std::vector< sycl::detail::pi::PiEvent > &RawEvents, const detail::EventImplPtr &OutEventImpl, const std::function< void *(Requirement *Req)> &getMemAllocationFunc, sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig, const bool KernelIsCooperative)
Definition: commands.cpp:2558
kernel_id get_kernel_id_impl(string_view KernelName)
sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type)
Definition: image_impl.cpp:187
std::shared_ptr< AccessorImplHost > AccessorImplPtr
Definition: accessor.hpp:532
sycl::detail::pi::PiImageCopyFlags getPiImageCopyFlags(sycl::usm::alloc SrcPtrType, sycl::usm::alloc DstPtrType)
Definition: handler.cpp:47
node_type getNodeTypeFromCG(sycl::detail::CG::CGTYPE CGType)
Definition: graph_impl.hpp:41
@ executable
In executable state, the graph is ready to execute.
__DPCPP_SYCL_EXTERNAL _SYCL_EXT_CPLX_INLINE_VISIBILITY std::enable_if_t< is_genfloat< _Tp >::value, _Tp > arg(const complex< _Tp > &__c)
constexpr size_t MaxNumAdditionalArgs
Definition: handler.cpp:835
kernel_bundle< bundle_state::executable > build(const kernel_bundle< bundle_state::input > &InputBundle, const std::vector< device > &Devs, const property_list &PropList={})
image_target
Definition: access.hpp:74
usm::alloc get_pointer_type(const void *ptr, const context &ctxt)
Query the allocation type from a USM pointer.
Definition: usm_impl.cpp:575
static bool checkContextSupports(const std::shared_ptr< detail::context_impl > &ContextImpl, sycl::detail::pi::PiContextInfo InfoQuery)
Definition: handler.cpp:1405
std::error_code make_error_code(sycl::errc E) noexcept
Constructs an error code using e and sycl_category()
Definition: exception.cpp:87
static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, size_t &IndexShift, int Size, bool IsKernelCreatedFromSource, size_t GlobalSize, std::vector< detail::ArgDesc > &Args, bool isESIMD)
Definition: handler.cpp:654
Definition: access.hpp:18
_pi_kernel_cache_config
Definition: pi.h:808
int32_t pi_int32
Definition: pi.h:212
pi_uint32 pi_bool
Definition: pi.h:215
_pi_mem_advice
Definition: pi.h:599
pi_result piEnqueueKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:537
pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Returns requested info for provided native device Return PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT fo...
Definition: pi_cuda.cpp:78
_pi_image_copy_flags
Definition: pi.h:676
@ PI_IMAGE_COPY_DEVICE_TO_DEVICE
Definition: pi.h:679
@ PI_IMAGE_COPY_DEVICE_TO_HOST
Definition: pi.h:678
@ PI_IMAGE_COPY_HOST_TO_DEVICE
Definition: pi.h:677
@ PI_MEM_TYPE_IMAGE_CUBEMAP
Definition: pi.h:596
@ PI_MEM_TYPE_IMAGE1D
Definition: pi.h:593
@ PI_MEM_TYPE_IMAGE1D_ARRAY
Definition: pi.h:594
@ PI_MEM_TYPE_IMAGE2D
Definition: pi.h:590
@ PI_MEM_TYPE_IMAGE2D_ARRAY
Definition: pi.h:592
@ PI_MEM_TYPE_IMAGE3D
Definition: pi.h:591
pi_result piextEnqueueCooperativeKernelLaunch(pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event)
Definition: pi_cuda.cpp:547
_pi_context_info
Definition: pi.h:479
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT
Definition: pi.h:492
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT
Definition: pi.h:493
@ PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT
Definition: pi.h:491
pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret)
Definition: pi_cuda.cpp:124
C++ wrapper of extern "C" PI interfaces.
size_t image_height
Definition: pi.h:1170
size_t image_row_pitch
Definition: pi.h:1173
size_t image_depth
Definition: pi.h:1171
size_t image_width
Definition: pi.h:1169
pi_mem_type image_type
Definition: pi.h:1168
size_t image_array_size
Definition: pi.h:1172
pi_image_channel_type image_channel_data_type
Definition: pi.h:1164
pi_image_channel_order image_channel_order
Definition: pi.h:1163
std::vector< detail::AccessorImplPtr > MAccStorage
Storage for accessors.
Definition: cg.hpp:101
std::vector< detail::EventImplPtr > MEvents
List of events that order the execution of this CG.
Definition: cg.hpp:109
std::vector< AccessorImplHost * > MRequirements
List of requirements that specify which memory is needed for the command group to be executed.
Definition: cg.hpp:107
A struct to describe the properties of an image.